Bladeren bron

DXBC to DXIL Converter + unit tests (#2685)

Includes dxilconv-specific DXIL optimization passes added to opt.exe tool.
Helena Kotas 5 jaren geleden
bovenliggende
commit
a42ffbf491
100 gewijzigde bestanden met toevoegingen van 21305 en 35 verwijderingen
  1. 5 0
      include/dxc/Support/Global.h
  2. 1 1
      lib/HLSL/LLVMBuild.txt
  3. 3 34
      projects/CMakeLists.txt
  4. 33 0
      projects/dxilconv/CMakeLists.txt
  5. 68 0
      projects/dxilconv/include/DxbcConverter.h
  6. 24 0
      projects/dxilconv/include/DxilConvPasses/DxilCleanup.h
  7. 41 0
      projects/dxilconv/include/DxilConvPasses/NormalizeDxil.h
  8. 117 0
      projects/dxilconv/include/DxilConvPasses/ScopeNest.h
  9. 97 0
      projects/dxilconv/include/DxilConvPasses/ScopeNestInfo.h
  10. 1021 0
      projects/dxilconv/include/DxilConvPasses/ScopeNestIterator.h
  11. 53 0
      projects/dxilconv/include/DxilConvPasses/ScopeNestedCFG.h
  12. 2604 0
      projects/dxilconv/include/ShaderBinary/ShaderBinary.h
  13. 35 0
      projects/dxilconv/include/Support/DXIncludes.h
  14. 141 0
      projects/dxilconv/include/Support/DxbcSignatures.h
  15. 42 0
      projects/dxilconv/include/Tracing/CMakeLists.txt
  16. 154 0
      projects/dxilconv/include/Tracing/DxcRuntime.man
  17. 4 0
      projects/dxilconv/lib/CMakeLists.txt
  18. 14 0
      projects/dxilconv/lib/DxbcConverter/CMakeLists.txt
  19. 7332 0
      projects/dxilconv/lib/DxbcConverter/DxbcConverter.cpp
  20. 634 0
      projects/dxilconv/lib/DxbcConverter/DxbcConverterImpl.h
  21. 1053 0
      projects/dxilconv/lib/DxbcConverter/DxbcUtil.cpp
  22. 191 0
      projects/dxilconv/lib/DxbcConverter/DxbcUtil.h
  23. 9 0
      projects/dxilconv/lib/DxilConvPasses/CMakeLists.txt
  24. 1334 0
      projects/dxilconv/lib/DxilConvPasses/DxilCleanup.cpp
  25. 43 0
      projects/dxilconv/lib/DxilConvPasses/InitializePasses.cpp
  26. 182 0
      projects/dxilconv/lib/DxilConvPasses/NormalizeDxil.cpp
  27. 100 0
      projects/dxilconv/lib/DxilConvPasses/ScopeNestInfo.cpp
  28. 1872 0
      projects/dxilconv/lib/DxilConvPasses/ScopeNestedCFG.cpp
  29. 11 0
      projects/dxilconv/lib/ShaderBinary/CMakeLists.txt
  30. 1246 0
      projects/dxilconv/lib/ShaderBinary/ShaderBinary.cpp
  31. 24 0
      projects/dxilconv/lib/ShaderBinary/ShaderBinaryIncludes.h
  32. 13 0
      projects/dxilconv/test/dxbc2dxil-asm/assemble_dxbc.bat
  33. 35 0
      projects/dxilconv/test/dxbc2dxil-asm/call2.asm
  34. BIN
      projects/dxilconv/test/dxbc2dxil-asm/call2.dxbc
  35. 129 0
      projects/dxilconv/test/dxbc2dxil-asm/call2.ref
  36. 35 0
      projects/dxilconv/test/dxbc2dxil-asm/cs3.asm
  37. BIN
      projects/dxilconv/test/dxbc2dxil-asm/cs3.dxbc
  38. 102 0
      projects/dxilconv/test/dxbc2dxil-asm/cs3.ref
  39. 11 0
      projects/dxilconv/test/dxbc2dxil-asm/cyclecounter.asm
  40. BIN
      projects/dxilconv/test/dxbc2dxil-asm/cyclecounter.dxbc
  41. 44 0
      projects/dxilconv/test/dxbc2dxil-asm/cyclecounter.ref
  42. 93 0
      projects/dxilconv/test/dxbc2dxil-asm/hs3.asm
  43. BIN
      projects/dxilconv/test/dxbc2dxil-asm/hs3.dxbc
  44. 159 0
      projects/dxilconv/test/dxbc2dxil-asm/hs3.ref
  45. 19 0
      projects/dxilconv/test/dxbc2dxil-asm/indexabletemp4.asm
  46. BIN
      projects/dxilconv/test/dxbc2dxil-asm/indexabletemp4.dxbc
  47. 82 0
      projects/dxilconv/test/dxbc2dxil-asm/indexabletemp4.ref
  48. 63 0
      projects/dxilconv/test/dxbc2dxil-asm/indexabletemp6.asm
  49. BIN
      projects/dxilconv/test/dxbc2dxil-asm/indexabletemp6.dxbc
  50. 103 0
      projects/dxilconv/test/dxbc2dxil-asm/indexabletemp6.ref
  51. 11 0
      projects/dxilconv/test/dxbc2dxil/abs1.hlsl
  52. 43 0
      projects/dxilconv/test/dxbc2dxil/abs1.ref
  53. 11 0
      projects/dxilconv/test/dxbc2dxil/abs2.hlsl
  54. 49 0
      projects/dxilconv/test/dxbc2dxil/abs2.ref
  55. 61 0
      projects/dxilconv/test/dxbc2dxil/atomics.hlsl
  56. 169 0
      projects/dxilconv/test/dxbc2dxil/atomics.ref
  57. 32 0
      projects/dxilconv/test/dxbc2dxil/bad_ftoi.hlsl
  58. 30 0
      projects/dxilconv/test/dxbc2dxil/bad_ftoi.ref
  59. 17 0
      projects/dxilconv/test/dxbc2dxil/binary1.hlsl
  60. 56 0
      projects/dxilconv/test/dxbc2dxil/binary1.ref
  61. 11 0
      projects/dxilconv/test/dxbc2dxil/bool1.hlsl
  62. 50 0
      projects/dxilconv/test/dxbc2dxil/bool1.ref
  63. 11 0
      projects/dxilconv/test/dxbc2dxil/bool2.hlsl
  64. 45 0
      projects/dxilconv/test/dxbc2dxil/bool2.ref
  65. 34 0
      projects/dxilconv/test/dxbc2dxil/bufinfo.hlsl
  66. 89 0
      projects/dxilconv/test/dxbc2dxil/bufinfo.ref
  67. 22 0
      projects/dxilconv/test/dxbc2dxil/calc_lod.hlsl
  68. 81 0
      projects/dxilconv/test/dxbc2dxil/calc_lod.ref
  69. 25 0
      projects/dxilconv/test/dxbc2dxil/call1.hlsl
  70. 93 0
      projects/dxilconv/test/dxbc2dxil/call1.ref
  71. 32 0
      projects/dxilconv/test/dxbc2dxil/call3.hlsl
  72. 132 0
      projects/dxilconv/test/dxbc2dxil/call3.ref
  73. 11 0
      projects/dxilconv/test/dxbc2dxil/cast1.hlsl
  74. 35 0
      projects/dxilconv/test/dxbc2dxil/cast1.ref
  75. 11 0
      projects/dxilconv/test/dxbc2dxil/cast2.hlsl
  76. 35 0
      projects/dxilconv/test/dxbc2dxil/cast2.ref
  77. 11 0
      projects/dxilconv/test/dxbc2dxil/cast3.hlsl
  78. 35 0
      projects/dxilconv/test/dxbc2dxil/cast3.ref
  79. 11 0
      projects/dxilconv/test/dxbc2dxil/cast4.hlsl
  80. 35 0
      projects/dxilconv/test/dxbc2dxil/cast4.ref
  81. 11 0
      projects/dxilconv/test/dxbc2dxil/cast5.hlsl
  82. 35 0
      projects/dxilconv/test/dxbc2dxil/cast5.ref
  83. 11 0
      projects/dxilconv/test/dxbc2dxil/cast6.hlsl
  84. 35 0
      projects/dxilconv/test/dxbc2dxil/cast6.ref
  85. 20 0
      projects/dxilconv/test/dxbc2dxil/cbuffer1.50.hlsl
  86. 51 0
      projects/dxilconv/test/dxbc2dxil/cbuffer1.50.ref
  87. 21 0
      projects/dxilconv/test/dxbc2dxil/cbuffer1.51.hlsl
  88. 50 0
      projects/dxilconv/test/dxbc2dxil/cbuffer1.51.ref
  89. 13 0
      projects/dxilconv/test/dxbc2dxil/cbuffer2.50.hlsl
  90. 49 0
      projects/dxilconv/test/dxbc2dxil/cbuffer2.50.ref
  91. 14 0
      projects/dxilconv/test/dxbc2dxil/cbuffer2.51.hlsl
  92. 48 0
      projects/dxilconv/test/dxbc2dxil/cbuffer2.51.ref
  93. 21 0
      projects/dxilconv/test/dxbc2dxil/cbuffer3.50.hlsl
  94. 71 0
      projects/dxilconv/test/dxbc2dxil/cbuffer3.50.ref
  95. 26 0
      projects/dxilconv/test/dxbc2dxil/cbuffer3.51.hlsl
  96. 74 0
      projects/dxilconv/test/dxbc2dxil/cbuffer3.51.ref
  97. 12 0
      projects/dxilconv/test/dxbc2dxil/cmp1.hlsl
  98. 38 0
      projects/dxilconv/test/dxbc2dxil/cmp1.ref
  99. 11 0
      projects/dxilconv/test/dxbc2dxil/constoperand1.hlsl
  100. 30 0
      projects/dxilconv/test/dxbc2dxil/constoperand1.ref

+ 5 - 0
include/dxc/Support/Global.h

@@ -203,6 +203,8 @@ inline void OutputDebugFormatA(_In_ _Printf_format_string_ _Null_terminated_ con
 
 #define DXASSERT_LOCALVAR(local, exp, msg) DXASSERT(exp, msg)
 
+#define DXASSERT_LOCALVAR_NOMSG(local, exp) DXASSERT_LOCALVAR(local, exp, "")
+
 #define DXASSERT_NOMSG(exp) DXASSERT(exp, "")
 
 #define DXVERIFY_NOMSG(exp) DXASSERT(exp, "")
@@ -214,6 +216,8 @@ inline void OutputDebugFormatA(_In_ _Printf_format_string_ _Null_terminated_ con
 
 #define DXASSERT_LOCALVAR(local, exp, msg) DXASSERT(exp, msg)
 
+#define DXASSERT_LOCALVAR_NOMSG(local, exp) DXASSERT_LOCALVAR(local, exp, "")
+
 #define DXVERIFY_NOMSG assert
 
 #define DXASSERT_ARGS(expr, fmt, ...) do { if (!(expr)) { fprintf(stderr, fmt, __VA_ARGS__); assert(false); } } while (0);
@@ -232,6 +236,7 @@ inline void OutputDebugFormatA(_In_ _Printf_format_string_ _Null_terminated_ con
 
 // DXASSERT_LOCALVAR is disabled in free builds, but we keep the local referenced to avoid a warning.
 #define DXASSERT_LOCALVAR(local, exp, msg) do { (void)(local); _Analysis_assume_(exp); } while (0)
+#define DXASSERT_LOCALVAR_NOMSG(local, exp) DXASSERT_LOCALVAR(local, exp, "")
 
 // DXASSERT_NOMSG is disabled in free builds.
 #define DXASSERT_NOMSG(exp) _Analysis_assume_(exp)

+ 1 - 1
lib/HLSL/LLVMBuild.txt

@@ -13,4 +13,4 @@
 type = Library
 name = HLSL
 parent = Libraries
-required_libraries = BitReader Core DxcSupport IPA Support
+required_libraries = BitReader Core DxcSupport IPA Support DXIL

+ 3 - 34
projects/CMakeLists.txt

@@ -1,34 +1,3 @@
-# Discover the projects that use CMake in the subdirectories.
-# Note that explicit cmake invocation is required every time a new project is
-# added or removed.
-file(GLOB entries *)
-foreach(entry ${entries})
-  if(IS_DIRECTORY ${entry} AND EXISTS ${entry}/CMakeLists.txt)
-    if((NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/compiler-rt) AND
-       (NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/dragonegg) AND
-       (NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/libcxx) AND
-       (NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/libcxxabi) AND
-       (NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/libunwind))
-      add_subdirectory(${entry})
-    endif()
-  endif()
-endforeach(entry)
-
-# Also add in libc++ and compiler-rt trees if present (and we have
-# a sufficiently recent version of CMake where required).
-if(${LLVM_BUILD_RUNTIME})
-  # MSVC isn't quite working with libc++ yet, disable it until issues are
-  # fixed.
-  if(NOT MSVC)
-    # libc++ uses the libc++abi target names so libc++abi should be added
-    # first.
-    add_llvm_external_project(libcxxabi)
-    add_llvm_external_project(libcxx)
-    add_llvm_external_project(libunwind)
-  endif()
-  if(NOT LLVM_BUILD_EXTERNAL_COMPILER_RT)
-    add_llvm_external_project(compiler-rt)
-  endif()
-endif()
-
-add_llvm_external_project(dragonegg)
+if(WIN32)
+  add_subdirectory(dxilconv)
+endif (WIN32)

+ 33 - 0
projects/dxilconv/CMakeLists.txt

@@ -0,0 +1,33 @@
+set(DXILCONV_PROJECT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
+set(DXILCONV_PROJECT_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
+
+include_directories(
+  ${LLVM_MAIN_INCLUDE_DIR}
+  ${DXILCONV_PROJECT_SOURCE_DIR}/include
+  ${DXILCONV_PROJECT_BINARY_DIR}/include
+)
+
+macro(add_dxilconv_project_library name)
+  add_llvm_library(${name} ${ARGN})
+#  add_definitions(/D_ITERATOR_DEBUG_LEVEL=0)
+  set_output_directory(${name} ${LLVM_RUNTIME_OUTPUT_INTDIR} ${LLVM_LIBRARY_OUTPUT_INTDIR})  
+  set_target_properties(${name} PROPERTIES FOLDER "Dxilconv libraries")
+endmacro(add_dxilconv_project_library)
+
+macro(add_dxilconv_project_executable name)
+  add_llvm_executable(${name} ${ARGN})
+  set_target_properties(${name} PROPERTIES FOLDER "Dxilconv executables")
+endmacro(add_dxilconv_project_executable)
+
+macro(add_dxilconv_project_test_library name)
+  add_dxilconv_project_library(${name} ${ARGN})
+  set_target_properties(${name} PROPERTIES FOLDER "Dxilconv tests")
+endmacro(add_dxilconv_project_test_library)
+
+
+if(WIN32)
+ add_subdirectory(lib)
+ add_subdirectory(tools)
+ add_subdirectory(unittests)
+ add_subdirectory(include/Tracing)
+endif()

+ 68 - 0
projects/dxilconv/include/DxbcConverter.h

@@ -0,0 +1,68 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// DxbcConverter.h                                                           //
+// Copyright (C) Microsoft. All rights reserved.                             //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// Provides declarations for the DirectX DXBC to DXIL converter component.   //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef __DXBC_CONVERTER__H__
+#define __DXBC_CONVERTER__H__
+
+#ifndef _MSC_VER
+extern "C"
+#endif
+DXC_API_IMPORT HRESULT __stdcall DxcCreateInstance(
+  _In_ REFCLSID   rclsid,
+  _In_ REFIID     riid,
+  _Out_ LPVOID*   ppv
+);
+
+#ifndef _MSC_VER
+extern "C"
+#endif
+DXC_API_IMPORT HRESULT __stdcall DxcCreateInstance2(
+  _In_ IMalloc    *pMalloc,
+  _In_ REFCLSID   rclsid,
+  _In_ REFIID     riid,
+  _Out_ LPVOID*   ppv
+);
+
+struct __declspec(uuid("5F956ED5-78D1-4B15-8247-F7187614A041"))
+IDxbcConverter : public IUnknown {
+  /// Create DXIL container out of DXBC shader blob.
+  virtual HRESULT STDMETHODCALLTYPE Convert(
+    _In_reads_bytes_(DxbcSize) LPCVOID pDxbc,
+    _In_ UINT32 DxbcSize,
+    _In_opt_z_ LPCWSTR pExtraOptions,
+    _Outptr_result_bytebuffer_maybenull_(*pDxilSize) LPVOID *ppDxil,
+    _Out_ UINT32 *pDxilSize,
+    _Outptr_result_maybenull_z_ LPWSTR *ppDiag) = 0;
+
+  /// Create DXIL LLVM module out of DXBC bytecode and DDI I/O signatures.
+  /// This is for driver consumption only.
+  virtual HRESULT STDMETHODCALLTYPE ConvertInDriver(
+    _In_reads_bytes_(pBytecode[1]) const UINT32 *pBytecode,
+    _In_opt_z_ LPCVOID pInputSignature,
+    _In_ UINT32 NumInputSignatureElements,
+    _In_opt_z_ LPCVOID pOutputSignature,
+    _In_ UINT32 NumOutputSignatureElements,
+    _In_opt_z_ LPCVOID pPatchConstantSignature,
+    _In_ UINT32 NumPatchConstantSignatureElements,
+    _In_opt_z_ LPCWSTR pExtraOptions,
+    _Out_ IDxcBlob **ppDxilModule,
+    _Outptr_result_maybenull_z_ LPWSTR *ppDiag) = 0;
+};
+
+__declspec(selectany)
+extern const CLSID CLSID_DxbcConverter = { /* 4900391E-B752-4EDD-A885-6FB76E25ADDB */
+  0x4900391e,
+  0xb752,
+  0x4edd,
+  { 0xa8, 0x85, 0x6f, 0xb7, 0x6e, 0x25, 0xad, 0xdb }
+};
+
+#endif

+ 24 - 0
projects/dxilconv/include/DxilConvPasses/DxilCleanup.h

@@ -0,0 +1,24 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// DxilCleanup.cpp                                                           //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// Optimization of DXIL after conversion from DXBC.                          //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#pragma once
+
+namespace llvm {
+class PassRegistry;
+class ModulePass;
+
+extern char &DxilCleanupID;
+
+llvm::ModulePass *createDxilCleanupPass();
+
+void initializeDxilCleanupPass(llvm::PassRegistry&);
+
+}

+ 41 - 0
projects/dxilconv/include/DxilConvPasses/NormalizeDxil.h

@@ -0,0 +1,41 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// NormalizeDxil.cpp                                                         //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// Normalize DXIL transformation.                                            //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#pragma once
+#include "llvm/Pass.h"
+
+
+namespace llvm {
+  class Function;
+  class PassRegistry;
+  class FunctionPass;
+
+
+  llvm::FunctionPass *createNormalizeDxilPass();
+  void initializeNormalizeDxilPassPass(llvm::PassRegistry&);
+
+  // The legacy pass manager's analysis pass to normalize dxil ir.
+  class NormalizeDxilPass : public FunctionPass {
+  public:
+    static char ID; // Pass identification, replacement for typeid
+
+    NormalizeDxilPass() : FunctionPass(ID) {
+      initializeNormalizeDxilPassPass(*PassRegistry::getPassRegistry());
+    }
+
+    // Normalize incoming dxil ir.
+    bool runOnFunction(Function &F) override;
+
+    virtual const char *getPassName() const override { return "Normalize Dxil"; }
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override;
+  };
+}

+ 117 - 0
projects/dxilconv/include/DxilConvPasses/ScopeNest.h

@@ -0,0 +1,117 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// ScopeNest.h                                                               //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+//
+// This file holds the type used to represent a scope nest. The
+// ScopeNestEvent is the type used by the iterator to represent the nesting
+// structure in the cfg.
+//
+// The iterator returns tokens of type ScopeNestEvent that describe the
+// structure. A ScopeNestEvent is a pair of a basic block and a scope type.
+// The block may be null depending on the scope type so it should always be
+// checked for null before using.
+//
+// See @ScopeNestIterator.h for more details on the iteration.
+//
+// The element types represent the major "events" that occur when walking a
+// scope nest. The block field corresponds to the basic block where the
+// event occurs. There may not always be a block associated with the event
+// because some events are used just to indicate transitions. For example,
+// with the If_Else and Switch_Case events, the actual else and case blocks
+// will be returned with the next event, which will have its own type indicating
+// the event caused by that block.
+//
+// The event location in the block depends on the scope type. For scope-opening
+// events, the location is at the end of the block. For example, the @If_Begin
+// event occurs an the end of the A block. For scope closing events the event
+// occurs at the top of the block. For example, the @If_End event occurs at
+// the entry to the X block. For events that do not open or close scopes
+// the events generally occur at the bottom of the block. For example, the
+// @Loop_Continue event occurs with the branch at the end of the block.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#pragma once
+
+namespace llvm {
+
+struct ScopeNestEvent {
+    enum class Type {
+        Invalid,        // Not a valid event.
+        TopLevel_Begin, // Before the first block. Block will be null.
+        Body,           // In the body of a scope. No interesting event.
+        Switch_Begin,   // Begin a switch scope. Block has multi-way branch.
+        Switch_Break,   // Break out of a switch scope. Block may be null.
+        Switch_Case,    // A case will start at the next event. Block will be null.
+        Switch_End,     // End a switch scope. Block is after all switch exits.
+        Loop_Begin,     // Begin a loop scope. Block has one branch leading to loop header.
+        Loop_Continue,  // A "continue" inside a loop. Block has one branch leading to loop latch.
+        Loop_Break,     // A "break" inside a loop. Block has one branch leading to Loop_End block.
+        Loop_End,       // End of loop marker. Block is after the loop (the post loop footer).
+        If_Begin,       // Start of if. Block has branch leading to the two sides of the if.
+        If_Else,        // The else body starts at the next event. Block will be null.
+        If_End,         // The end if marker. Block may be null.
+        TopLevel_End,   // After the last block. Block will be null.
+    };
+
+    typedef const BasicBlock BlockTy; // TODO: make this a template so we can have const and non-const iterators.
+    Type ElementType;
+    BlockTy *Block;
+    
+    ScopeNestEvent(BlockTy *B, Type T) : Block(B), ElementType(T) {}
+    static ScopeNestEvent Invalid() { return ScopeNestEvent(nullptr, Type::Invalid); }
+    
+    const bool IsBeginScope() const {
+        switch (ElementType) {
+        case Type::TopLevel_Begin: return "TopLevel_Begin";
+        case Type::Switch_Begin:   return "Switch_Begin";
+        case Type::Loop_Begin:     return "Loop_Begin";
+        case Type::If_Begin:       return "If_Begin";
+            return true;
+        }
+        return false;
+    }
+
+    const bool IsEndScope() const {
+        switch (ElementType) {
+        case Type::If_End:
+        case Type::Switch_End:
+        case Type::Loop_End:
+        case Type::TopLevel_End:
+            return true;
+        }
+        return false;
+    }
+    
+    const char *GetElementTypeName() const {
+        switch (ElementType) {
+        case Type::Invalid:        return "Invalid";
+        case Type::TopLevel_Begin: return "TopLevel_Begin";
+        case Type::Body:           return "Body";
+        case Type::Switch_Begin:   return "Switch_Begin";
+        case Type::Switch_Case:    return "Switch_Case";
+        case Type::Switch_Break:   return "Switch_Break";
+        case Type::Switch_End:     return "Switch_End";
+        case Type::Loop_Begin:     return "Loop_Begin";
+        case Type::Loop_Continue:  return "Loop_Continue";
+        case Type::Loop_Break:     return "Loop_Break";
+        case Type::Loop_End:       return "Loop_End";
+        case Type::If_Begin:       return "If_Begin";
+        case Type::If_Else:        return "If_Else";
+        case Type::If_End:         return "If_End";
+        case Type::TopLevel_End:   return "TopLevel_End";
+        }
+        assert(false && "unreachable");
+        return "Unknown";
+    }
+
+    bool operator==(const ScopeNestEvent &other) const {
+        return Block == other.Block && ElementType == other.ElementType;
+    }
+};
+}

+ 97 - 0
projects/dxilconv/include/DxilConvPasses/ScopeNestInfo.h

@@ -0,0 +1,97 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// ScopeNestInfo.h                                                           //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// Implementation of ScopeNestInfo class and related transformation pass.    //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+// Pass to read the scope nest annotations in a cfg and provide a high-level
+// view of the scope nesting structure.
+//
+// The pass follows the same usage patter as the LLVM LoopInfo pass. We have
+// a ScopeNestInfo class that contains the results of the scope info
+// analysis. The ScopeNestInfoWrapperPass class is the pass implementation
+// that runs the analysis and saves the results so it can be queried by
+// a later pass.
+//
+// This pass requires the the -scopenestedcfg pass has been run prior to
+// running this pass because we rely on the cfg annotations added by the
+// scopenestedcfg pass.
+//
+// This pass is itself a thin wrapper around the ScopeNestIterator pass. The
+// iterator does the heavy lifting and we just cache the results of the
+// iteration here. We keep the iterator separate so that it can be easily
+// run outside the llvm pass infrastructure.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#pragma once
+#include "llvm/Pass.h"
+#include "DxilConvPasses/ScopeNest.h"
+
+
+namespace llvm {
+class Function;
+class PassRegistry;
+class FunctionPass;
+
+
+llvm::FunctionPass *createScopeNestInfoWrapperPass();
+void initializeScopeNestInfoWrapperPassPass(llvm::PassRegistry&);
+
+// Class to hold the results of the scope nest analysis.
+//
+// Provides an iterator to examine the sequence of ScopeNestElements.
+// We could provide a higher-level view of the scope nesting if needed,
+// but that would probably build on the stream of elements anyway.
+//
+// This class is modeled after llvm LoopInfo.
+class ScopeNestInfo {
+public:
+    typedef std::vector<ScopeNestEvent>::const_iterator elements_iterator;
+    typedef iterator_range<elements_iterator> elements_iterator_range;
+
+    elements_iterator elements_begin() { return m_scopeElements.begin(); }
+    elements_iterator elements_end()   { return m_scopeElements.end(); }
+    elements_iterator_range elements(){ return elements_iterator_range(elements_begin(), elements_end()); }
+
+    void Analyze(Function &F);
+    void print(raw_ostream &O) const;
+    void releaseMemory();
+
+private:
+    std::vector<ScopeNestEvent> m_scopeElements;
+
+    raw_ostream &indent(raw_ostream &O, int level, StringRef str) const;
+};
+
+// The legacy pass manager's analysis pass to read scope nest annotation information.
+//
+// This class is modeled after the llvm LoopInfoWrapperPass.
+class ScopeNestInfoWrapperPass : public FunctionPass {
+    ScopeNestInfo SI;
+
+public:
+    static char ID; // Pass identification, replacement for typeid
+
+    ScopeNestInfoWrapperPass() : FunctionPass(ID) {
+        initializeScopeNestInfoWrapperPassPass(*PassRegistry::getPassRegistry());
+    }
+
+    ScopeNestInfo &getScopeNestedInfo() { return SI; }
+    const ScopeNestInfo &getScopeNestedInfo() const { return SI; }
+
+    // Read the scope nest annotation information for a given function.
+    bool runOnFunction(Function &F) override;
+
+    void releaseMemory() override;
+
+    void print(raw_ostream &O, const Module *M = nullptr) const override;
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+}

+ 1021 - 0
projects/dxilconv/include/DxilConvPasses/ScopeNestIterator.h

@@ -0,0 +1,1021 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// ScopeNestIterator.h                                                       //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// Implementation of ScopeNestIterator class.                                //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+// The ScopeNestIterator class iterates over a cfg that has been annoated with
+// scope markers by the scopenestedcfg pass.
+//
+// The iterator produces a sequence of ScopeNestEvent tokens as it iterates
+// over the cfg. The tokens describe the nesting structure of the cfg and
+// the blocks that correspond to the nesting events. Each block will only be
+// returned once by the iterator.
+//
+// Because each block is only returned once some events do not have an
+// associated block (i.e. it will be nullptr). This is necessary to handle
+// cases where a block has two logical events assocaited with it. For example,
+// when a block is the start of an else branch but also starts a new nested if
+// scope.
+//
+// For example, for a nested if-else like this:
+//              A
+//             /  \
+//            B    C
+//            |   /  \
+//            |  D    E 
+//            |   \  /
+//            |     F
+//            \     /
+//             \   /
+//               X  
+// We would get an event sequence like this:
+//
+// @TopLevel_Begin (null)
+// @If_Begin       (A)
+//   @Body         (B)
+// @If_Else        (null)
+//   @IF_Begin     (C)
+//     @Body       (D)
+//   @If_Else      (null)
+//     @Body       (E)
+//   @If_End       (F)
+// @If_End         (X)
+// @TopLevel_End   (null)
+//
+// See @ScopeNest.h for details on the scope events.
+//
+// Note:
+// This iterator is implemented in a header file with the intention that
+// it will be made into a templated version to support both const and non-const
+// iterators.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#pragma once
+#include "DxilConvPasses/ScopeNestedCFG.h"
+#include "DxilConvPasses/ScopeNest.h"
+#include "dxc/Support/Global.h"
+
+#include "llvm/IR/Function.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/CFG.h"
+
+#include <stack>
+
+namespace llvm {
+class Function;
+class BasicBlock;
+
+
+// The ScopeNestIterator class is a heavy-weight iterator that walks the cfg
+// in scope nest order. The iterator keeps a large amount of state while
+// iterating and so it is expensive to copy and compare iterators for equality.
+// The end() iterator has no state so comparing and copying the end is
+// relatively cheap.
+//
+// The iterator provides a standard c++ iterator interface. All the logic is
+// handled by the IteratorState class.
+//
+
+class ScopeNestIterator
+{
+public:
+    typedef ScopeNestEvent::BlockTy Block; // TODO: make this a template.
+
+    static ScopeNestIterator begin(const Function &F) {
+        return ScopeNestIterator(F.getEntryBlock());
+    }
+
+    static ScopeNestIterator end() {
+        return ScopeNestIterator();
+    }
+    
+    ScopeNestEvent& operator*() {
+        DXASSERT_NOMSG(!m_state.IsDone());
+        return m_currentElement;
+    }
+
+    ScopeNestIterator& operator++() {
+        (void)GetNextElement();
+        return *this;
+    }
+
+    bool operator==(const ScopeNestIterator &other) const {
+        return m_state == other.m_state;
+    }
+    
+    bool operator!=(const ScopeNestIterator &other) const {
+        return !(*this == other);
+    }
+
+private: // Interface
+    ScopeNestIterator(Block &entry)
+        : m_state(&entry)
+        , m_currentElement(ScopeNestEvent::Invalid())
+    {
+        // Must advance iterator to first element. Should always succeed.
+        bool ok = GetNextElement();
+        DXASSERT_LOCALVAR_NOMSG(ok, ok);
+    }
+
+    ScopeNestIterator()
+        : m_state(nullptr)
+        , m_currentElement(ScopeNestEvent::Invalid())
+    {
+    }
+
+    bool GetNextElement() {
+        bool ok = m_state.MoveNext();
+        if (ok) {
+            m_currentElement = m_state.GetCurrent();
+        }
+        return ok;
+    }
+
+private: // ScopeNestIterator Implementation
+
+    // BranchAnnotation
+    //
+    // Provides safe access to the scope annotation on the block. Use 
+    // the operator bool() to check if there is an annotation.
+    // e.g. if (BranchAnnotation a = BranchAnnotation::Read(B)) { ... }
+    class BranchAnnotation
+    {
+    public:
+        static BranchAnnotation Read(Block *B) {
+            if (!B) { return BranchAnnotation(); }
+
+            const TerminatorInst *end = B->getTerminator();
+            if (!end) { DXASSERT_NOMSG(false); return BranchAnnotation(); }
+
+            MDNode *md = end->getMetadata("dx.BranchKind");
+            if (!md) { return BranchAnnotation(); }
+
+            BranchKind kind = static_cast<BranchKind>(cast<ConstantInt>(cast<ConstantAsMetadata>(md->getOperand(0))->getValue())->getZExtValue());
+            return BranchAnnotation(kind);
+        }
+        
+        BranchAnnotation(BranchKind kind) : Kind(kind) {}
+
+        operator bool() const { return IsSome(); }
+
+        BranchKind Get() const { DXASSERT_NOMSG(IsSome());  return Kind; }
+
+        bool IsEndIf() const { return Kind == BranchKind::IfEnd; }
+
+        bool IsEndScope() {
+            switch (Kind) {
+            case BranchKind::IfEnd:
+            case BranchKind::LoopBreak:
+            case BranchKind::LoopContinue:
+            case BranchKind::LoopBackEdge:
+            case BranchKind::LoopExit:
+            case BranchKind::SwitchBreak:
+            case BranchKind::SwitchEnd:
+                return true;
+            default:
+                return false;
+            }
+        }
+
+        bool IsBeginScope() {
+            switch (Kind) {
+            case BranchKind::IfBegin:
+            case BranchKind::IfNoEnd:
+            case BranchKind::LoopBegin:
+            case BranchKind::LoopNoEnd:
+            case BranchKind::SwitchBegin:
+            case BranchKind::SwitchNoEnd:
+                return true;
+            default:
+                return false;
+            }
+        }
+
+        // Translate a branch annoatation to the corresponding event type.
+        ScopeNestEvent::Type TranslateToNestType() {
+            switch (Kind) {
+            case BranchKind::Invalid: return ScopeNestEvent::Type::Invalid;
+
+            case BranchKind::IfBegin: return ScopeNestEvent::Type::If_Begin;
+            case BranchKind::IfEnd:   return ScopeNestEvent::Type::If_End;
+            case BranchKind::IfNoEnd: return ScopeNestEvent::Type::If_Begin;
+
+            case BranchKind::SwitchBegin: return ScopeNestEvent::Type::Switch_Begin;
+            case BranchKind::SwitchEnd:   return ScopeNestEvent::Type::Switch_End;
+            case BranchKind::SwitchNoEnd: return ScopeNestEvent::Type::Switch_Begin;
+            case BranchKind::SwitchBreak: return ScopeNestEvent::Type::Switch_Break;
+
+            case BranchKind::LoopBegin:    return ScopeNestEvent::Type::Loop_Begin;
+            case BranchKind::LoopExit:     return ScopeNestEvent::Type::Loop_End;
+            case BranchKind::LoopNoEnd:    return ScopeNestEvent::Type::Loop_Begin;
+            case BranchKind::LoopBreak:    return ScopeNestEvent::Type::Loop_Break;
+            case BranchKind::LoopContinue: return ScopeNestEvent::Type::Loop_Continue;
+            case BranchKind::LoopBackEdge: return ScopeNestEvent::Type::Body; // End of loop is marked at loop exit.
+            }
+            DXASSERT(false, "unreachable");
+            return ScopeNestEvent::Type::Invalid;
+        }
+    private:
+        bool IsSome() const { return Kind != BranchKind::Invalid; }
+        BranchAnnotation() : Kind(BranchKind::Invalid) {}
+        BranchKind Kind;
+    };
+
+    // Scope
+    //
+    // A nested scope. Used as part of the stack state to keep track of what
+    // kind of scopes we have entered but not yet exited.
+    //
+    // Instead of using a class heirarchy we provide scope-specific methods
+    // and validate the scope type to ensure that we only operate on the kind
+    // of scope we expect to see.
+    struct Scope {
+        enum class Type { TopLevel, If, Loop, Switch };
+    public:
+        Scope(Type scopeType, Block *startBlock, BranchKind annotation)
+            : m_type(scopeType)
+            , m_startBlock(startBlock)
+            , m_startAnnotation(annotation)
+            , m_endBlock(nullptr)
+            , m_backedge(nullptr)
+
+        {
+            if (m_type == Type::If) { DXASSERT_NOMSG(startBlock && startBlock->getTerminator()->getNumSuccessors() == 2); }
+        }
+
+        Type GetType() const { return m_type; }
+
+        Block *GetStartBlock()        { return m_startBlock; }
+        Block *GetIfEndBlock()        { return GetEndBlock(Type::If); }
+        Block *GetLoopBackedgeBlock() { AssertType(Type::Loop); return m_backedge; }
+        Block *GetLoopEndBlock()      { return GetEndBlock(Type::Loop); }
+        Block *GetSwitchEndBlock()    { return GetEndBlock(Type::Switch); }
+
+        void SetIfEndBlock(Block *B) {
+            SetEndBlock(Type::If, B);
+        }
+        
+        void SetLoopBackedgeBlock(Block *B) {
+            SetBackedgeBlock(Type::Loop, B);
+        }
+
+        void SetLoopEndBlock(Block *B) {
+            SetEndBlock(Type::Loop, B);
+        }
+
+        void SetSwitchEndBlock(Block *B) {
+            SetEndBlock(Type::Switch, B);
+        }
+
+        bool operator==(const Scope &other) const {
+            return  m_type == other.m_type &&
+                    m_startAnnotation == other.m_startAnnotation &&
+                    m_startBlock == other.m_startBlock &&
+                    m_endBlock == other.m_endBlock &&
+                    m_backedge == other.m_backedge;
+        }
+
+    private:
+        Type  m_type;
+        BranchKind m_startAnnotation;
+        Block *m_startBlock;
+        Block *m_endBlock;
+        Block *m_backedge; // only for loop.
+
+        void SetEndBlock(Type expectedType, Block *endBlock) {
+            AssertType(expectedType);
+            AssertUnchanged(m_endBlock, endBlock);
+            m_endBlock = endBlock;
+        }
+        
+        void SetBackedgeBlock(Type expectedType, Block *backedge) {
+            AssertType(expectedType);
+            AssertUnchanged(m_backedge, backedge);
+            m_backedge = backedge;
+        }
+
+        void AssertUnchanged(Block *oldBlock, Block *newBlock) {
+            DXASSERT((oldBlock == nullptr || oldBlock == newBlock), "block should not change");
+        }
+        
+        void AssertType(Type t) {
+            DXASSERT_NOMSG(t == m_type);
+        }
+
+        Block *GetEndBlock(Type t) {
+            AssertType(t);
+            return m_endBlock;
+        }
+    };
+
+    // StackState
+    //
+    // Keeps track of the state of exploration for an open scope. Uses a small
+    // state machine to move through the exploration stages. When moving to a
+    // new state it notifies the caller of the new state and any block associated
+    // with the state.
+    //
+    // Transitions:
+    //
+    // Top Level
+    // -------------------------------
+    // Start     -> Top_begin
+    // Top_begin -> Top_body
+    // Top_body  -> Top_end 
+    // Top_end   -> Done
+    //
+    // If
+    // -------------------------------
+    // If_thenbody -> If_else | If_end
+    // If_else     -> If_elsebody
+    // If_elsebody -> If_end
+    //
+    // Loop
+    // -------------------------------
+    // Loop_body     -> Loop_backedge
+    // Loop_backedge -> Loop_end
+    //
+    // Switch
+    // -------------------------------
+    // Switch_begin -> Switch_case
+    // Switch_case  -> Switch_body
+    // Switch_body  -> Switch_break
+    // Switch_break -> Switch_case | Switch_end
+    //
+    //
+    // Terminal States:
+    // Done, Switch_end, Loop_end, If_end
+    // 
+    class StackState {
+    public:
+        enum State {
+            // Initial top level state before emitting the Top_begin token.
+            Start,
+
+            // If
+            If_thenbody,   // Exploring the true branch of the if.
+            If_else,       // Transitioning from true to false branch.
+            If_elsebody,   // Exploring the false branch of the if.
+            If_end,        // Finished exploring the if.
+
+            // Loop
+            Loop_body,     // Exploring the loop body.
+            Loop_backedge, // On the loop latch block (branch to loop header).
+            Loop_end,      // Finshed exploring the loop.
+
+            // Switch
+            Switch_begin,  // Start of switch before entering any case.
+            Switch_case,   // Starting a new case.
+            Switch_body,   // Exploring a case body.
+            Switch_break,  // Break from a case.
+            Switch_end,    // Finished exploring the switch.
+
+            // Top level
+            Top_begin,     // Before exploring the first block.
+            Top_body,      // Exploring the body of the function.
+            Top_end,       // After exploring all blocks.
+
+            // Final state after top level is popped.
+            Done
+        };
+
+        StackState(Scope scope, unsigned edge)
+            : m_scope(scope)
+            , m_edgeNumber(edge)
+        {
+            switch (scope.GetType()) {
+            case Scope::Type::If:       m_state = If_thenbody; break;
+            case Scope::Type::Loop:     m_state = Loop_body; break;
+            case Scope::Type::Switch:   m_state = Switch_begin; break;
+            case Scope::Type::TopLevel: m_state = Start; break;
+            default:
+                DXASSERT_NOMSG(false);
+            }
+        }
+
+        Scope &GetScope()             { return m_scope; }
+        const Scope &GetScope() const { return m_scope; }
+
+        struct StateTransition { State state; Block *block; };
+
+        // Transition this stack element to the next state and return associated block.
+        StateTransition MoveToNextState() {
+            Block *block = nullptr;
+            switch (m_state) {
+            // IF
+            case If_thenbody: {
+                // See if we have an else body or not.
+                // The else body is missing when:
+                //   Case 1: Successor block is the found endif block.
+                //   Case 2: Endif block was not found and successor is marked as an endif block.
+                Block *succ = GetNextSuccessor();
+                BranchAnnotation annotation = BranchAnnotation::Read(succ);
+
+                const bool succMatchesFoundEndIf = (succ == m_scope.GetIfEndBlock());
+                const bool succIsMarkedAsEndIf   = (m_scope.GetIfEndBlock() == nullptr && annotation && annotation.Get() == BranchKind::IfEnd);
+                const bool succIsEndif = succMatchesFoundEndIf || succIsMarkedAsEndIf;
+
+                if (succIsEndif) {
+                    m_state = If_end;
+                    block = succ;
+                }
+                else {
+                    m_state = If_else;
+                    block = nullptr;
+                }
+                break;
+            }
+            case If_else:
+                m_state = If_elsebody;
+                block   = MoveToNextSuccessor();
+                break;
+            case If_elsebody:
+                m_state = If_end;
+                block   = m_scope.GetIfEndBlock();
+                break;
+
+            // LOOP
+            case Loop_body:
+                m_state = Loop_backedge;
+                block   = m_scope.GetLoopBackedgeBlock();
+                break;
+            case Loop_backedge:
+                m_state = Loop_end;
+                block   = m_scope.GetLoopEndBlock();
+                break;
+
+            // SWITCH
+            case Switch_begin:
+                m_state = Switch_case;
+                block = nullptr;
+                break;
+            case Switch_case:
+                block = GetCurrentSuccessor();
+                m_state = Switch_body;
+                break;
+            case Switch_body:
+                m_state = Switch_break;
+                block = nullptr;
+                break;
+            case Switch_break:
+                block = MoveToNextUniqueSuccessor();
+                if (block) {
+                    m_state = Switch_case;
+                    block = nullptr; // will resume after emitting case marker.
+                }
+                else {
+                    m_state = Switch_end;
+                    block   = m_scope.GetSwitchEndBlock();
+                }
+                break;
+            
+            // TOP LEVEL
+            case Start:
+                m_state = Top_begin;
+                block = nullptr;
+                break;
+
+            case Top_begin:
+                m_state = Top_body;
+                block = m_scope.GetStartBlock();
+                break;
+
+            case Top_body:
+                m_state = Top_end;
+                block = nullptr;
+                break;
+            
+            case Top_end:
+                m_state = Done;
+                block = nullptr;
+                break;
+
+            // INVALID
+            // The stack state should already be popped because there is no next state.
+            case If_end:
+            case Switch_end:
+            case Loop_end:
+            case Done:
+            default:
+                DXASSERT_NOMSG(false);
+            }
+
+            return { m_state, block };
+        }
+
+        bool operator==(const StackState& other) const {
+            return  m_scope == other.m_scope &&
+                    m_edgeNumber == other.m_edgeNumber &&
+                    m_state == other.m_state;
+        }
+    
+    private:
+        Scope m_scope;
+        unsigned m_edgeNumber;
+        State m_state;
+
+    private:
+        // Return next successor or nullptr if no more successors need to be explored.
+        // Does not modify current edge number.
+        Block *GetNextSuccessor() {
+            return GetSuccessor(m_edgeNumber+1);
+        }
+
+        // Increment edge number and return next successor.
+        Block *MoveToNextSuccessor() {
+            Block *succ = GetNextSuccessor();
+            if (succ) {
+                ++m_edgeNumber;
+            }
+            return succ;
+        }
+
+        // Get the successor we are currently set to explore.
+        Block *GetCurrentSuccessor() {
+            return GetSuccessor(m_edgeNumber);
+        }
+
+        // Get successor block or nullptr if there is no such succssor.
+        Block *GetSuccessor(unsigned succNumber) {
+            Block *const scopeStartBlock = m_scope.GetStartBlock();
+            Block *succ = nullptr;
+            if (scopeStartBlock && scopeStartBlock->getTerminator()) {
+                if (succNumber < scopeStartBlock->getTerminator()->getNumSuccessors()) {
+                    succ_const_iterator succs = succ_begin(scopeStartBlock);
+                    std::advance(succs, succNumber);
+                    succ = *succs;
+                }
+            }
+            return succ;
+        }
+
+        // Move to the next succssor that does not match a previous successor.
+        // Needed to avoid visiting blocks multiple times blocks in a switch
+        // when multiple cases point to the same block.
+        Block *MoveToNextUniqueSuccessor() {
+            Block *succ = nullptr;
+            
+            SmallPtrSet<Block *, 8> visited;
+            Block *const scopeStartBlock = m_scope.GetStartBlock();
+
+            if (scopeStartBlock && scopeStartBlock->getTerminator()) {
+                succ_const_iterator succs = succ_begin(scopeStartBlock);
+                succ_const_iterator succsEnd = succ_end(scopeStartBlock);
+                const unsigned nextEdgeNumber = m_edgeNumber + 1;
+                unsigned edge = 0;
+                // Mark all successors less than the current edge number as visited.
+                for (; succs != succsEnd && edge < nextEdgeNumber; ++succs, ++edge) {
+                    visited.insert(*succs);
+                }
+                DXASSERT_NOMSG(succs == succsEnd || edge == nextEdgeNumber);
+                
+                // Look for next unvisited edge.
+                for (; succs != succsEnd; ++succs, ++edge) {
+                    if (!visited.count(*succs)) {
+                        break;
+                    }
+                }
+
+                // If we found an edge before the end then move to it.
+                if (succs != succsEnd) {
+                    DXASSERT_NOMSG(edge < scopeStartBlock->getTerminator()->getNumSuccessors());
+                    succ = *succs;
+                    m_edgeNumber = edge;
+                }
+            }
+
+            return succ;
+        }
+
+    };
+
+    // ScopeStack
+    //
+    // A stack to hold state information about scopes that are under exploration.
+    //
+    class ScopeStack {
+    public:
+        bool Empty() const {
+            return m_stack.empty();
+        }
+
+        void Clear() {
+            m_stack.clear();
+        }
+
+        void PushScope(const Scope &scope) {
+            m_stack.push_back(StackState(scope, 0));
+        }
+
+        void PopScope() {
+            DXASSERT_NOMSG(!Empty());
+            m_stack.pop_back();
+        }
+
+        Scope &Top() {
+            DXASSERT_NOMSG(!Empty());
+            return m_stack.back().GetScope();
+        }
+        
+        const Scope &Top() const {
+            DXASSERT_NOMSG(!Empty());
+            return m_stack.back().GetScope();
+        }
+
+        // Transition state on the top of the stack to the next state.
+        StackState::StateTransition AdvanceTopOfStack() {
+            DXASSERT_NOMSG(!Empty());
+            return m_stack.back().MoveToNextState();
+        }
+
+        Scope &FindInnermostLoop() {
+            return FindInnermost(Scope::Type::Loop);
+        }
+
+        Scope &FindInnermostIf() {
+            return FindInnermost(Scope::Type::If);
+        }
+        
+        Scope &FindInnermostSwitch() {
+            return FindInnermost(Scope::Type::Switch);
+        }
+
+        // Define equality to be fast for comparing to the "end" state
+        // so that the iterator test in a loop is fase.
+        bool operator==(const ScopeStack& other) const {
+            // Quick check on size to make non-equality fast.
+            return m_stack.size() == other.m_stack.size() &&
+                   m_stack        == other.m_stack;
+        }
+
+    private:
+        typedef std::vector<StackState> Stack;
+        Stack m_stack;
+
+        Scope &FindInnermost(Scope::Type type) {
+            Stack::reverse_iterator scope =
+                std::find_if(m_stack.rbegin(), m_stack.rend(), [type](const StackState &s) {return s.GetScope().GetType() == type; });
+            DXASSERT_NOMSG(scope != m_stack.rend());
+            return scope->GetScope();
+        }
+    };
+
+    // IteratorState
+    //
+    // Keeps track of all the current state of the iteration. The iterator state
+    // works as follows.
+    //
+    // We keep a current event that describes the most recent event returned by
+    // the iterator. To advance the iterator we look at whether we have a valid
+    // block associated with the event. If we do then we keep exploring from
+    // that block. If there is no block (i.e. it is nullptr) then we explore from
+    // the top element of the scope stack.
+    //
+    // The scope stack is used to keep track of the nested scopes. The stack elements
+    // are a little state machine that keep track of what the next action should be
+    // when exploring from the stack. The last action is the "end scope" action which
+    // tells us we should pop the scope from the stack.
+    class IteratorState {
+    public:
+        IteratorState(Block *entry)
+            : m_current(ScopeNestEvent::Invalid())
+            , m_stack()
+        {
+            if (entry) {
+                m_stack.PushScope(Scope(Scope::Type::TopLevel, entry, BranchKind::Invalid));
+            }
+            else {
+                SetDone();
+            }
+        }
+
+        ScopeNestEvent GetCurrent()
+        {
+            return m_current;
+        }
+
+        // Move to the next event.
+        // Return true if there is a new valid event or false if there is no more events.
+        bool MoveNext()
+        {
+            if (IsDone())
+            {
+                return false;
+            }
+
+            if (m_current.Block == nullptr)
+            {
+                MoveFromTopOfStack();
+            }
+            else
+            {
+                MoveFromCurrentBlock();
+            }
+            return !IsDone();
+        }
+
+        bool IsDone() {
+            return m_stack.Empty() && m_current.Block == nullptr;
+        }
+
+        bool operator==(const IteratorState &other) const {
+            return m_current == other.m_current &&
+                   m_stack   == other.m_stack;
+        }
+
+    private:
+        ScopeNestEvent m_current;
+        ScopeStack m_stack;
+
+    private:
+        void SetDone() {
+            m_stack.Clear();
+            m_current = ScopeNestEvent::Invalid();
+            DXASSERT_NOMSG(IsDone());
+        }
+        
+        void SetCurrent(ScopeNestEvent::Type T, Block *B) {
+            m_current.ElementType = T;
+            m_current.Block = B;
+
+            if (B) {
+                BranchAnnotation annotation = BranchAnnotation::Read(B);
+                if (annotation) {
+                    DXASSERT_NOMSG(annotation.TranslateToNestType() == T);
+                }
+            }
+        }
+
+        void MoveFromTopOfStack() {
+            DXASSERT_NOMSG(!m_stack.Empty());
+            StackState::StateTransition next = m_stack.AdvanceTopOfStack();
+            switch (next.state) {
+            case StackState::If_else: 
+                DXASSERT_NOMSG(next.block == nullptr);
+                SetCurrent(ScopeNestEvent::Type::If_Else, next.block);
+                break;
+            case StackState::If_elsebody:
+                EnterScopeBodyFromStack(next.block);
+                break;
+            case StackState::If_end:
+                m_stack.PopScope();
+                SetCurrent(ScopeNestEvent::Type::If_End, next.block);
+                break;
+
+            case StackState::Loop_backedge:
+                SetCurrent(BranchAnnotation(BranchKind::LoopBackEdge).TranslateToNestType(), next.block);
+                break;
+            case StackState::Loop_end:
+                m_stack.PopScope();
+                SetCurrent(ScopeNestEvent::Type::Loop_End, next.block);
+                break;
+
+            case StackState::Switch_case:
+                DXASSERT_NOMSG(next.block == nullptr);
+                SetCurrent(ScopeNestEvent::Type::Switch_Case, next.block);
+                break;
+
+            case StackState::Switch_body:
+                EnterScopeBodyFromStack(next.block);
+                break;
+            
+            case StackState::Switch_break:
+                DXASSERT_NOMSG(next.block == nullptr);
+                SetCurrent(ScopeNestEvent::Type::Switch_Break, next.block);
+                break;
+
+            case StackState::Switch_end:
+                m_stack.PopScope();
+                SetCurrent(ScopeNestEvent::Type::Switch_End, next.block);
+                break;
+            
+            case StackState::Top_begin:
+                DXASSERT_NOMSG(next.block == nullptr);
+                SetCurrent(ScopeNestEvent::Type::TopLevel_Begin, next.block);
+                break;
+            
+            case StackState::Top_body:
+                EnterScopeBodyFromStack(next.block);
+                break;
+
+            case StackState::Top_end:
+                DXASSERT_NOMSG(next.block == nullptr);
+                SetCurrent(ScopeNestEvent::Type::TopLevel_End, next.block);
+                break;
+            
+            case StackState::Done:
+                m_stack.PopScope();
+                SetDone();
+                break;
+
+            default:
+                DXASSERT_NOMSG(false);
+            }
+        }
+        
+        void EnterScopeBodyFromStack(Block *B)
+        {
+            DXASSERT_NOMSG(B);
+            BranchAnnotation annotation = BranchAnnotation::Read(B);
+            if (annotation) {
+                // Make sure we are not ending a scope end because that will cause
+                // us to move from the stack again. Indicates some problem with the
+                // state transition.
+                BranchKind Kind = annotation.Get();
+                DXASSERT_LOCALVAR_NOMSG(Kind, Kind != BranchKind::IfEnd &&
+                       Kind != BranchKind::LoopBackEdge &&
+                       Kind != BranchKind::LoopExit &&
+                       Kind != BranchKind::SwitchEnd);
+            }
+            MoveToBlock(B);
+        }
+
+        void MoveFromCurrentBlock() {
+            DXASSERT_NOMSG(m_current.Block && m_current.Block->getTerminator());
+            BranchAnnotation annotation = BranchAnnotation::Read(m_current.Block);
+            
+            if (annotation) {
+                MoveFromAnnotatedBlock(annotation.Get());
+            }
+            else {
+                MoveFromNonAnnotatedBlock();
+            }
+        }
+
+        void MoveFromAnnotatedBlock(BranchKind annotation) {
+            switch (annotation) {
+            // Already entered a new scope.
+            case BranchKind::IfBegin:
+            case BranchKind::IfNoEnd:
+            case BranchKind::LoopBegin:
+            case BranchKind::LoopNoEnd:
+                DXASSERT(m_current.Block->getTerminator()->getNumSuccessors() >= 1,  "scope entry should have a successor");
+                MoveToFirstSuccessor();
+                break;
+            
+            // Start switch. Need to emit first case element from stack.
+            case BranchKind::SwitchBegin:
+            case BranchKind::SwitchNoEnd:
+                MoveFromTopOfStack();
+                break;
+
+            // Already exited an old scope.
+            case BranchKind::IfEnd:
+            case BranchKind::SwitchEnd:
+            case BranchKind::LoopExit:
+                DXASSERT(m_current.Block->getTerminator()->getNumSuccessors() <= 1, "scope exit should not have multiple successors");
+                MoveToFirstSuccessor();
+                break;
+            
+            // Keep exploring in same scope.
+            case BranchKind::SwitchBreak:
+            case BranchKind::LoopBreak:
+            case BranchKind::LoopContinue:
+            case BranchKind::LoopBackEdge:
+                MoveFromTopOfStack();
+                break;
+
+            default: DXASSERT_NOMSG(false);
+            }
+        }
+
+        void MoveFromNonAnnotatedBlock() {
+            DXASSERT(m_current.Block->getTerminator()->getNumSuccessors() <= 1,  "multi-way branch should be annotated");
+            MoveToFirstSuccessor();
+        }
+
+        void MoveToFirstSuccessor() {
+            // No successors to explore. Continue from current scope.
+            if (!m_current.Block->getTerminator()->getNumSuccessors()) {
+                DXASSERT_NOMSG(isa<ReturnInst>(m_current.Block->getTerminator()));
+                MoveFromTopOfStack();
+                return;
+            }
+
+            // Get first successor block.
+            Block *succ = *succ_const_iterator(m_current.Block->getTerminator());
+            MoveToBlock(succ);
+        }
+
+        void MoveToBlock(Block *B) {
+            // Annotated successor.
+            if (BranchAnnotation annotation = BranchAnnotation::Read(B))
+            {
+                if (annotation.IsEndScope()) {
+                    EnterEndOfScope(B, annotation.Get());
+                }
+                else {
+                    DXASSERT_NOMSG(annotation.IsBeginScope());
+                    StartNewScope(B, annotation.Get());
+                }
+            }
+            // Non-Annotated successor.
+            else {
+                SetCurrent(ScopeNestEvent::Type::Body, B);
+            }
+        }
+
+        // Visit the end of scope node from a predecssor we have already explored.
+        void EnterEndOfScope(Block *endOfScopeBlock, BranchKind endofScopeKind) {
+            switch (endofScopeKind) {
+            case BranchKind::IfEnd: {
+                Scope &ifScope = m_stack.FindInnermostIf();
+                ifScope.SetIfEndBlock(endOfScopeBlock);
+                MoveFromTopOfStack();
+                break;
+            }
+
+            case BranchKind::LoopBackEdge: {
+                Scope &loopScope = m_stack.FindInnermostLoop();
+                loopScope.SetLoopBackedgeBlock(endOfScopeBlock);
+                MoveFromTopOfStack();
+                break;
+            }
+            
+            case BranchKind::LoopExit: {
+                Scope &loopScope = m_stack.FindInnermostLoop();
+                loopScope.SetLoopEndBlock(endOfScopeBlock);
+                MoveFromTopOfStack();
+                break;
+            }
+            
+            case BranchKind::SwitchEnd: {
+                Scope &switchScope = m_stack.FindInnermostSwitch();
+                switchScope.SetSwitchEndBlock(endOfScopeBlock);
+                MoveFromTopOfStack();
+                break;
+            }
+
+            case BranchKind::LoopBreak: {
+                Scope &loopScope = m_stack.FindInnermostLoop();
+                loopScope.SetLoopEndBlock(endOfScopeBlock->getUniqueSuccessor());
+                SetCurrent(ScopeNestEvent::Type::Loop_Break, endOfScopeBlock);
+                break;
+            }
+
+            case BranchKind::LoopContinue: {
+                Scope &loopScope = m_stack.FindInnermostLoop();
+                loopScope.SetLoopBackedgeBlock(endOfScopeBlock->getUniqueSuccessor());
+                SetCurrent(ScopeNestEvent::Type::Loop_Continue, endOfScopeBlock);
+                break;
+            }
+            
+            case BranchKind::SwitchBreak: {
+                Scope &switchScope = m_stack.FindInnermostSwitch();
+                switchScope.SetSwitchEndBlock(endOfScopeBlock->getUniqueSuccessor());
+                SetCurrent(ScopeNestEvent::Type::Switch_Break, endOfScopeBlock);
+                break;
+            }
+
+            default:
+                DXASSERT_NOMSG(false);
+            }
+        }
+
+        void StartNewScope(Block *startOfScopeBlock, BranchKind startOfScopeKind) {
+            Scope::Type scopeType;
+            ScopeNestEvent::Type nestType;
+            switch (startOfScopeKind) {
+            case BranchKind::IfBegin:
+            case BranchKind::IfNoEnd:
+                scopeType = Scope::Type::If;
+                nestType  = ScopeNestEvent::Type::If_Begin;
+                break;
+            case BranchKind::LoopBegin:
+            case BranchKind::LoopNoEnd:
+                scopeType = Scope::Type::Loop;
+                nestType  = ScopeNestEvent::Type::Loop_Begin;
+                break;
+            case BranchKind::SwitchBegin:
+            case BranchKind::SwitchNoEnd:
+                scopeType = Scope::Type::Switch;
+                nestType  = ScopeNestEvent::Type::Switch_Begin;
+                break;
+            default:
+                DXASSERT_NOMSG(false);
+            }
+            
+            SetCurrent(nestType, startOfScopeBlock);
+            m_stack.PushScope(Scope(scopeType, startOfScopeBlock, startOfScopeKind));
+        }
+
+    };
+
+private: // Members
+    IteratorState m_state;
+    ScopeNestEvent m_currentElement;
+};
+
+}

+ 53 - 0
projects/dxilconv/include/DxilConvPasses/ScopeNestedCFG.h

@@ -0,0 +1,53 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// ScopeNestedCFG.cpp                                                        //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// Pass that converts a reducible CFG into scope-nested CFG.                 //
+// The pass expects that the following passes have been run                  //
+// right before the pass is invoked:                                         //
+//   -simplifycfg                                                            //
+//   -loop-simplify                                                          //
+//   -reg2mem_hlsl                                                           //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#pragma once
+
+
+namespace llvm {
+class Module;
+class Function;
+class PassRegistry;
+class FunctionPass;
+
+
+llvm::FunctionPass *createScopeNestedCFGPass();
+void initializeScopeNestedCFGPass(llvm::PassRegistry&);
+
+llvm::FunctionPass *createLoopSimplifyFunctionPass();
+void initializeLoopSimplifyFunctionPass(llvm::PassRegistry&);
+
+enum class BranchKind {
+  Invalid = 0,
+
+  IfBegin,
+  IfEnd,
+  IfNoEnd,
+
+  SwitchBegin,
+  SwitchEnd,
+  SwitchNoEnd,
+  SwitchBreak,
+
+  LoopBegin,
+  LoopExit,
+  LoopNoEnd,
+  LoopBreak,
+  LoopContinue,
+  LoopBackEdge,
+};
+
+}

+ 2604 - 0
projects/dxilconv/include/ShaderBinary/ShaderBinary.h

@@ -0,0 +1,2604 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// ShaderBinary.h                                                          //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// Vertex shader binary format parsing and encoding.                         //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#pragma once
+
+//has dependencies on D3D10TokenizedProgramFormat.hpp! make sure to include that too!
+
+typedef UINT CShaderToken;
+
+
+//*****************************************************************************
+//
+// GetNumVertices
+//
+// Returns the number of vertices in a complete primitive
+//
+//*****************************************************************************
+inline UINT GetNumVertices( D3D10_SB_PRIMITIVE PrimType )
+{
+    switch( PrimType )
+    {
+    case D3D10_SB_PRIMITIVE_POINT: return 1;
+    case D3D10_SB_PRIMITIVE_LINE: return 2;
+    case D3D10_SB_PRIMITIVE_TRIANGLE: return 3;
+    case D3D10_SB_PRIMITIVE_LINE_ADJ: return 4;
+    case D3D10_SB_PRIMITIVE_TRIANGLE_ADJ: return 6;
+    case D3D11_SB_PRIMITIVE_1_CONTROL_POINT_PATCH: return 1;
+    case D3D11_SB_PRIMITIVE_2_CONTROL_POINT_PATCH: return 2;
+    case D3D11_SB_PRIMITIVE_3_CONTROL_POINT_PATCH: return 3;
+    case D3D11_SB_PRIMITIVE_4_CONTROL_POINT_PATCH: return 4;
+    case D3D11_SB_PRIMITIVE_5_CONTROL_POINT_PATCH: return 5;
+    case D3D11_SB_PRIMITIVE_6_CONTROL_POINT_PATCH: return 6;
+    case D3D11_SB_PRIMITIVE_7_CONTROL_POINT_PATCH: return 7;
+    case D3D11_SB_PRIMITIVE_8_CONTROL_POINT_PATCH: return 8;
+    case D3D11_SB_PRIMITIVE_9_CONTROL_POINT_PATCH: return 9;
+    case D3D11_SB_PRIMITIVE_10_CONTROL_POINT_PATCH: return 10;
+    case D3D11_SB_PRIMITIVE_11_CONTROL_POINT_PATCH: return 11;
+    case D3D11_SB_PRIMITIVE_12_CONTROL_POINT_PATCH: return 12;
+    case D3D11_SB_PRIMITIVE_13_CONTROL_POINT_PATCH: return 13;
+    case D3D11_SB_PRIMITIVE_14_CONTROL_POINT_PATCH: return 14;
+    case D3D11_SB_PRIMITIVE_15_CONTROL_POINT_PATCH: return 15;
+    case D3D11_SB_PRIMITIVE_16_CONTROL_POINT_PATCH: return 16;
+    case D3D11_SB_PRIMITIVE_17_CONTROL_POINT_PATCH: return 17;
+    case D3D11_SB_PRIMITIVE_18_CONTROL_POINT_PATCH: return 18;
+    case D3D11_SB_PRIMITIVE_19_CONTROL_POINT_PATCH: return 19;
+    case D3D11_SB_PRIMITIVE_20_CONTROL_POINT_PATCH: return 20;
+    case D3D11_SB_PRIMITIVE_21_CONTROL_POINT_PATCH: return 21;
+    case D3D11_SB_PRIMITIVE_22_CONTROL_POINT_PATCH: return 22;
+    case D3D11_SB_PRIMITIVE_23_CONTROL_POINT_PATCH: return 23;
+    case D3D11_SB_PRIMITIVE_24_CONTROL_POINT_PATCH: return 24;
+    case D3D11_SB_PRIMITIVE_25_CONTROL_POINT_PATCH: return 25;
+    case D3D11_SB_PRIMITIVE_26_CONTROL_POINT_PATCH: return 26;
+    case D3D11_SB_PRIMITIVE_27_CONTROL_POINT_PATCH: return 27;
+    case D3D11_SB_PRIMITIVE_28_CONTROL_POINT_PATCH: return 28;
+    case D3D11_SB_PRIMITIVE_29_CONTROL_POINT_PATCH: return 29;
+    case D3D11_SB_PRIMITIVE_30_CONTROL_POINT_PATCH: return 30;
+    case D3D11_SB_PRIMITIVE_31_CONTROL_POINT_PATCH: return 31;
+    case D3D11_SB_PRIMITIVE_32_CONTROL_POINT_PATCH: return 32;
+    default: return 0;
+    }
+}
+
+
+/*==========================================================================;
+ *
+ *  D3D10ShaderBinary namespace
+ *
+ *  File:       ShaderBinary.h
+ *  Content:    Vertex shader assembler support
+ *
+ ***************************************************************************/
+
+namespace D3D10ShaderBinary
+{
+
+const UINT MAX_INSTRUCTION_LENGTH       = 128;
+const UINT D3D10_SB_MAX_INSTRUCTION_OPERANDS = 8;
+const UINT D3D11_SB_MAX_CALL_OPERANDS = 0x10000;
+const UINT D3D11_SB_MAX_NUM_TYPES = 0x10000;
+
+typedef enum D3D10_SB_OPCODE_CLASS
+{
+    D3D10_SB_FLOAT_OP,
+    D3D10_SB_INT_OP,
+    D3D10_SB_UINT_OP,
+    D3D10_SB_BIT_OP,
+    D3D10_SB_FLOW_OP,
+    D3D10_SB_TEX_OP,
+    D3D10_SB_DCL_OP,
+    D3D11_SB_ATOMIC_OP,
+    D3D11_SB_MEM_OP,
+    D3D11_SB_DOUBLE_OP,
+    D3D11_SB_FLOAT_TO_DOUBLE_OP,
+    D3D11_SB_DOUBLE_TO_FLOAT_OP,
+    D3D11_SB_DEBUG_OP,
+} D3D10_SB_OPCODE_CLASS;
+
+struct CInstructionInfo
+{
+    void Set (BYTE NumOperands,
+              LPCSTR Name,
+              D3D10_SB_OPCODE_CLASS OpClass,
+              BYTE InPrecisionFromOutMask)
+    {
+        m_NumOperands = NumOperands;
+        m_InPrecisionFromOutMask = InPrecisionFromOutMask;
+
+        StringCchCopyA(m_Name, sizeof(m_Name), Name);
+
+        m_OpClass = OpClass;
+    }
+    
+    char            m_Name[64];
+    BYTE            m_NumOperands;
+    BYTE            m_InPrecisionFromOutMask;
+    D3D10_SB_OPCODE_CLASS m_OpClass;
+};
+
+
+extern CInstructionInfo g_InstructionInfo[D3D10_SB_NUM_OPCODES];
+
+UINT __stdcall GetNumInstructionOperands(D3D10_SB_OPCODE_TYPE OpCode);
+void __stdcall InitInstructionInfo();
+
+//*****************************************************************************
+//
+// class COperandIndex
+//
+// Represents a dimension index of an operand
+//
+//*****************************************************************************
+
+class COperandIndex
+{
+public:
+    COperandIndex() : m_bExtendedOperand(FALSE) {}
+    // Value for the immediate index type
+    union
+    {
+        UINT        m_RegIndex;
+        UINT        m_RegIndexA[2];
+        INT64       m_RegIndex64;
+    };
+    // Data for the relative index type
+    D3D10_SB_OPERAND_TYPE    m_RelRegType;
+    D3D10_SB_4_COMPONENT_NAME m_ComponentName;
+    D3D10_SB_OPERAND_INDEX_DIMENSION         m_IndexDimension;
+
+    BOOL                                     m_bExtendedOperand;
+    D3D11_SB_OPERAND_MIN_PRECISION           m_MinPrecision;
+    BOOL                                     m_Nonuniform;
+    D3D10_SB_EXTENDED_OPERAND_TYPE           m_ExtendedOperandType;
+
+    // First index of the relative register
+    union
+    {
+        UINT        m_RelIndex;
+        UINT        m_RelIndexA[2];
+        INT64       m_RelIndex64;
+    };
+    // Second index of the relative register
+    union
+    {
+        UINT        m_RelIndex1;
+        UINT        m_RelIndexA1[2];
+        INT64       m_RelIndex641;
+    };
+
+    void SetMinPrecision(D3D11_SB_OPERAND_MIN_PRECISION MinPrec)
+    {
+        m_MinPrecision = MinPrec;
+        if( MinPrec != D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT )
+        {
+            m_bExtendedOperand = true;
+            m_ExtendedOperandType = D3D10_SB_EXTENDED_OPERAND_MODIFIER; // piggybacking on modifier token for minprecision
+        }
+    }
+
+    void SetNonuniformIndex(bool bNonuniform = false)
+    {
+        m_Nonuniform = bNonuniform;
+        if(bNonuniform)
+        {
+            m_bExtendedOperand = true;
+            m_ExtendedOperandType = D3D10_SB_EXTENDED_OPERAND_MODIFIER;
+        }
+    }
+};
+
+
+enum MinPrecQuantizeFunctionIndex // Used by reference rasterizer (IHVs can ignore)
+{
+    MinPrecFuncDefault = 0,
+    MinPrecFunc2_8,
+    MinPrecFunc16,
+    MinPrecFuncUint16,
+    MinPrecFuncInt16,
+};
+
+//*****************************************************************************
+//
+// class COperandBase
+//
+// A base class for shader instruction operands
+//
+//*****************************************************************************
+
+class COperandBase
+{
+public:
+    COperandBase() {Clear();}
+    COperandBase(const COperandBase & Op) { memcpy(this, &Op, sizeof(*this)); }
+    D3D10_SB_OPERAND_TYPE OperandType() const {return m_Type;}
+    const COperandIndex* OperandIndex(UINT Index) const {return &m_Index[Index];}
+    D3D10_SB_OPERAND_INDEX_REPRESENTATION OperandIndexType(UINT Index) const {return m_IndexType[Index];}
+    D3D10_SB_OPERAND_INDEX_DIMENSION OperandIndexDimension() const {return m_IndexDimension;}
+    D3D10_SB_OPERAND_NUM_COMPONENTS NumComponents() const {return m_NumComponents;}
+    // Get the register index for a given dimension
+    UINT RegIndex(UINT Dimension = 0) const {return m_Index[Dimension].m_RegIndex;}
+    // Get the register index from the lowest dimension
+    UINT RegIndexForMinorDimension() const 
+    {
+        switch (m_IndexDimension)
+        {
+            default:
+            case D3D10_SB_OPERAND_INDEX_1D:
+                return RegIndex(0);
+            case D3D10_SB_OPERAND_INDEX_2D:
+                return RegIndex(1);
+            case D3D10_SB_OPERAND_INDEX_3D:
+                return RegIndex(2);
+        }
+    }
+    // Get the write mask
+    UINT WriteMask() const {return m_WriteMask;}
+    // Get the swizzle
+    UINT SwizzleComponent(UINT index) const {return m_Swizzle[index];}
+    // Get immediate 32 bit value
+    UINT Imm32() const {return m_Value[0];}
+    void SetModifier(D3D10_SB_OPERAND_MODIFIER Modifier)
+    {
+        m_Modifier = Modifier;
+        if (Modifier != D3D10_SB_OPERAND_MODIFIER_NONE)
+        {
+            m_bExtendedOperand = true;
+            m_ExtendedOperandType = D3D10_SB_EXTENDED_OPERAND_MODIFIER;
+        }
+    }
+    void SetMinPrecision(D3D11_SB_OPERAND_MIN_PRECISION MinPrec)
+    {
+        m_MinPrecision = MinPrec;
+        if( m_MinPrecision != D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT )
+        {
+            m_bExtendedOperand = true;
+            m_ExtendedOperandType = D3D10_SB_EXTENDED_OPERAND_MODIFIER; // reusing same extended operand token as modifiers.
+        }
+    }
+    void SetNonuniform(bool bNonuniform = false)
+    {
+        m_Nonuniform = bNonuniform;
+        if(bNonuniform)
+        {
+            m_bExtendedOperand = true;
+            m_ExtendedOperandType = D3D10_SB_EXTENDED_OPERAND_MODIFIER;
+        }
+    }
+    D3D10_SB_OPERAND_MODIFIER Modifier() const {return m_Modifier;}
+    void SetSwizzle(BYTE SwizzleX=D3D10_SB_4_COMPONENT_X, 
+                    BYTE SwizzleY=D3D10_SB_4_COMPONENT_Y, 
+                    BYTE SwizzleZ=D3D10_SB_4_COMPONENT_Z, 
+                    BYTE SwizzleW=D3D10_SB_4_COMPONENT_W)
+    {
+        m_ComponentSelection = D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE;
+        m_Swizzle[0] = SwizzleX;
+        m_Swizzle[1] = SwizzleY;
+        m_Swizzle[2] = SwizzleZ;
+        m_Swizzle[3] = SwizzleW;
+    }
+    void SelectComponent(D3D10_SB_4_COMPONENT_NAME ComponentName=D3D10_SB_4_COMPONENT_X)
+    {
+        m_ComponentSelection = D3D10_SB_OPERAND_4_COMPONENT_SELECT_1_MODE;
+        m_ComponentName = ComponentName;
+    }
+    void SetMask(UINT Mask=D3D10_SB_OPERAND_4_COMPONENT_MASK_ALL)
+    {
+        m_ComponentSelection = D3D10_SB_OPERAND_4_COMPONENT_MASK_MODE;
+        m_WriteMask = Mask;
+    }
+    void SetIndex(UINT Dim, UINT Imm32)
+    {
+        m_IndexType[Dim] = D3D10_SB_OPERAND_INDEX_IMMEDIATE32;
+        m_Index[Dim].m_RegIndex = Imm32;
+    }
+    void SetIndex(  UINT Dim, 
+                    UINT Offset, 
+                    D3D10_SB_OPERAND_TYPE RelRegType, 
+                    UINT RelRegIndex0, 
+                    UINT RelRegIndex1,
+                    D3D10_SB_4_COMPONENT_NAME RelComponentName,
+                    D3D11_SB_OPERAND_MIN_PRECISION RelRegMinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT)
+    {
+        m_IndexType[Dim] = D3D10_SB_OPERAND_INDEX_IMMEDIATE32;
+        if (Offset == 0)
+            m_IndexType[Dim] = D3D10_SB_OPERAND_INDEX_RELATIVE;
+        else
+            m_IndexType[Dim] = D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE;
+        m_Index[Dim].m_RegIndex = Offset;                   // immediate offset, such as the 3 in cb0[x1[2].x + 3] or cb0[r1.x + 3]
+        m_Index[Dim].m_RelRegType = RelRegType;
+        if( RelRegType == D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP )
+            m_Index[Dim].m_IndexDimension = D3D10_SB_OPERAND_INDEX_2D;
+        else
+            m_Index[Dim].m_IndexDimension = D3D10_SB_OPERAND_INDEX_1D;
+        m_Index[Dim].m_RelIndex = RelRegIndex0;             // relative register index, such as the 1 in cb0[x1[2].x + 3] or cb0[r1.x + 3]
+        m_Index[Dim].m_RelIndex1 = RelRegIndex1;            // relative register second dimension index, such as the 2 in cb0[x1[2].x + 3]
+        m_Index[Dim].m_ComponentName = RelComponentName;
+        m_Index[Dim].SetMinPrecision(RelRegMinPrecision);
+    }
+
+public:  //esp in the unions...it's just redundant to not directly access things
+    void Clear()
+    {
+        memset(this, 0, sizeof(*this));
+    }
+    MinPrecQuantizeFunctionIndex                 m_MinPrecQuantizeFunctionIndex; // used by ref for low precision (IHVs can ignore)
+    D3D10_SB_OPERAND_TYPE                        m_Type;
+    COperandIndex                                m_Index[3];
+    D3D10_SB_OPERAND_NUM_COMPONENTS              m_NumComponents;
+    D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE  m_ComponentSelection;
+    BOOL                                         m_bExtendedOperand;
+    D3D10_SB_OPERAND_MODIFIER                    m_Modifier;
+    D3D11_SB_OPERAND_MIN_PRECISION               m_MinPrecision;
+    BOOL                                         m_Nonuniform;
+    D3D10_SB_EXTENDED_OPERAND_TYPE               m_ExtendedOperandType;
+    union
+    {
+        UINT                   m_WriteMask;
+        BYTE                    m_Swizzle[4];
+    };
+    D3D10_SB_4_COMPONENT_NAME    m_ComponentName;
+    union
+    {
+        UINT                                m_Value[4];
+        float                               m_Valuef[4];
+        INT64                               m_Value64[2];
+        double                              m_Valued[2];
+    };
+    struct
+    {
+        D3D10_SB_OPERAND_INDEX_REPRESENTATION    m_IndexType[3];
+        D3D10_SB_OPERAND_INDEX_DIMENSION         m_IndexDimension;
+#pragma warning(suppress: 4201) // Warning about nameless structure.
+    };
+
+    friend class CShaderAsm;
+    friend class CShaderCodeParser;
+    friend class CInstruction;
+    friend class COperand;
+    friend class COperandDst;
+};
+
+//*****************************************************************************
+//
+// class COperand
+//
+// Encapsulates a source operand in shader instructions
+//
+//*****************************************************************************
+
+class COperand: public COperandBase
+{
+public:
+    COperand(): COperandBase() {}
+    COperand(UINT Imm32): COperandBase()
+    {
+        m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE;
+        m_WriteMask = 0;
+        m_Type = D3D10_SB_OPERAND_TYPE_IMMEDIATE32;
+        m_bExtendedOperand = FALSE;
+        m_IndexDimension = D3D10_SB_OPERAND_INDEX_0D;
+        m_Value[0] = Imm32;
+        m_NumComponents = D3D10_SB_OPERAND_1_COMPONENT;
+    }
+    COperand(int Imm32): COperandBase()
+    {
+        m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE;
+        m_WriteMask = 0;
+        m_Type = D3D10_SB_OPERAND_TYPE_IMMEDIATE32;
+        m_bExtendedOperand = FALSE;
+        m_IndexDimension = D3D10_SB_OPERAND_INDEX_0D;
+        m_Value[0] = Imm32;
+        m_NumComponents = D3D10_SB_OPERAND_1_COMPONENT;
+    }
+    COperand(float Imm32): COperandBase()
+    {
+        m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE;
+        m_WriteMask = 0;
+        m_Type = D3D10_SB_OPERAND_TYPE_IMMEDIATE32;
+        m_bExtendedOperand = FALSE;
+        m_IndexDimension = D3D10_SB_OPERAND_INDEX_0D;
+        m_Valuef[0] = Imm32;
+        m_NumComponents = D3D10_SB_OPERAND_1_COMPONENT;
+    }
+    COperand(INT64 Imm64): COperandBase()
+    {
+        m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE;
+        m_WriteMask = 0;
+        m_Type = D3D10_SB_OPERAND_TYPE_IMMEDIATE64;
+        m_bExtendedOperand = FALSE;
+        m_IndexDimension = D3D10_SB_OPERAND_INDEX_0D;
+        m_Value64[0] = Imm64;
+        m_NumComponents = D3D10_SB_OPERAND_1_COMPONENT;
+    }
+    COperand(D3D10_SB_OPERAND_TYPE Type, UINT RegIndex,
+        D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT)
+        : COperandBase()
+    {
+        m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE;
+        m_Type = Type;
+        SetMinPrecision(MinPrecision);
+        m_NumComponents = D3D10_SB_OPERAND_0_COMPONENT;
+        m_IndexDimension = D3D10_SB_OPERAND_INDEX_1D;
+        SetIndex(0, RegIndex);
+    }
+    // Immediate constant
+    COperand(float v1, float v2, float v3, float v4): COperandBase()
+    {
+        m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE;
+        SetSwizzle();
+        m_Type = D3D10_SB_OPERAND_TYPE_IMMEDIATE32;
+        m_bExtendedOperand = FALSE;
+        m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT;
+        m_IndexDimension = D3D10_SB_OPERAND_INDEX_0D;
+        m_Valuef[0] = v1;
+        m_Valuef[1] = v2;
+        m_Valuef[2] = v3;
+        m_Valuef[3] = v4;
+    }
+    // Immediate constant
+    COperand(double v1, double v2): COperandBase()
+    {
+        m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE;
+        SetSwizzle();
+        m_Type = D3D10_SB_OPERAND_TYPE_IMMEDIATE64;
+        m_bExtendedOperand = FALSE;
+        m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT;
+        m_IndexDimension = D3D10_SB_OPERAND_INDEX_0D;
+        m_Valued[0] = v1;
+        m_Valued[1] = v2;
+    }
+    // Immediate constant
+    COperand(float v1, float v2, float v3, float v4,
+             BYTE SwizzleX, BYTE SwizzleY, BYTE SwizzleZ, BYTE SwizzleW): COperandBase()
+    {
+        m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE;
+        SetSwizzle(SwizzleX, SwizzleY, SwizzleZ, SwizzleW);
+        m_Type = D3D10_SB_OPERAND_TYPE_IMMEDIATE32;
+        m_bExtendedOperand = FALSE;
+        m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT;
+        m_IndexDimension = D3D10_SB_OPERAND_INDEX_0D;
+        m_Valuef[0] = v1;
+        m_Valuef[1] = v2;
+        m_Valuef[2] = v3;
+        m_Valuef[3] = v4;
+    }
+
+    // Immediate constant
+    COperand(int v1, int v2, int v3, int v4): COperandBase()
+    {
+        m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE;
+        SetSwizzle();
+        m_Type = D3D10_SB_OPERAND_TYPE_IMMEDIATE32;
+        m_bExtendedOperand = FALSE;
+        m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT;
+        m_IndexDimension = D3D10_SB_OPERAND_INDEX_0D;
+        m_Value[0] = v1;
+        m_Value[1] = v2;
+        m_Value[2] = v3;
+        m_Value[3] = v4;
+    }
+    // Immediate constant
+    COperand(int v1, int v2, int v3, int v4,
+             BYTE SwizzleX, BYTE SwizzleY, BYTE SwizzleZ, BYTE SwizzleW): COperandBase()
+    {
+        m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE;
+        SetSwizzle(SwizzleX, SwizzleY, SwizzleZ, SwizzleW);
+        m_Type = D3D10_SB_OPERAND_TYPE_IMMEDIATE32;
+        m_bExtendedOperand = FALSE;
+        m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT;
+        m_IndexDimension = D3D10_SB_OPERAND_INDEX_0D;
+        m_Value[0] = v1;
+        m_Value[1] = v2;
+        m_Value[2] = v3;
+        m_Value[3] = v4;
+    }
+    COperand(INT64 v1, INT64 v2): COperandBase()
+    {
+        m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE;
+        SetSwizzle();
+        m_Type = D3D10_SB_OPERAND_TYPE_IMMEDIATE64;
+        m_bExtendedOperand = FALSE;
+        m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT;
+        m_IndexDimension = D3D10_SB_OPERAND_INDEX_0D;
+        m_Value64[0] = v1;
+        m_Value64[1] = v2;
+    }
+
+    COperand(D3D10_SB_OPERAND_TYPE Type, UINT RegIndex,
+             BYTE SwizzleX, BYTE SwizzleY, BYTE SwizzleZ, BYTE SwizzleW,
+             D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT): COperandBase()
+    {
+        m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE;
+        SetSwizzle(SwizzleX, SwizzleY, SwizzleZ, SwizzleW);
+        m_Type = Type;
+        m_bExtendedOperand = FALSE;
+        SetMinPrecision(MinPrecision);
+        m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT;
+        m_IndexDimension = D3D10_SB_OPERAND_INDEX_1D;
+        SetIndex(0, RegIndex);
+    }
+
+    // Used for operands without indices
+    COperand(D3D10_SB_OPERAND_TYPE Type,
+        D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT): COperandBase()
+    {
+        m_Type = Type;
+        m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE;
+        m_bExtendedOperand = FALSE;
+        SetMinPrecision(MinPrecision);
+        m_IndexDimension = D3D10_SB_OPERAND_INDEX_0D;
+        if( (Type == D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID) ||
+            (Type == D3D11_SB_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID) ||
+            (Type == D3D11_SB_OPERAND_TYPE_INPUT_COVERAGE_MASK) ||
+            (Type == D3D11_SB_OPERAND_TYPE_INNER_COVERAGE) ||
+            (Type == D3D11_SB_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED) ||
+            (Type == D3D11_SB_OPERAND_TYPE_INPUT_GS_INSTANCE_ID) )
+        {
+            m_NumComponents = D3D10_SB_OPERAND_1_COMPONENT;
+        }
+        else if( (Type == D3D11_SB_OPERAND_TYPE_INPUT_DOMAIN_POINT) ||
+                 (Type == D3D11_SB_OPERAND_TYPE_INPUT_THREAD_ID) ||
+                 (Type == D3D11_SB_OPERAND_TYPE_INPUT_THREAD_GROUP_ID) ||
+                 (Type == D3D11_SB_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP) ||
+                 (Type == D3D11_SB_OPERAND_TYPE_CYCLE_COUNTER) )
+        {
+            m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT;
+        }
+        else
+        {
+            m_NumComponents = D3D10_SB_OPERAND_0_COMPONENT;
+        }
+    }
+
+    // source operand with relative addressing
+    COperand(D3D10_SB_OPERAND_TYPE Type, UINT RegIndex,
+             D3D10_SB_OPERAND_TYPE RelRegType, UINT RelRegIndex, D3D10_SB_4_COMPONENT_NAME RelComponentName,
+             D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT,
+             D3D11_SB_OPERAND_MIN_PRECISION RelRegMinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT): COperandBase()
+    {
+        m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE;
+        m_Type = Type;
+        m_bExtendedOperand = FALSE;
+        SetMinPrecision(MinPrecision);
+        m_NumComponents = D3D10_SB_OPERAND_0_COMPONENT;
+        m_IndexDimension = D3D10_SB_OPERAND_INDEX_1D;
+        SetIndex(0, RegIndex, RelRegType, RelRegIndex, 0xFFFFFFFF, RelComponentName, RelRegMinPrecision);
+    }
+
+
+    friend class CShaderAsm;
+    friend class CShaderCodeParser;
+    friend class CInstruction;
+};
+
+//*****************************************************************************
+//
+// class COperand4
+//
+// Encapsulates a source operand with 4 components in shader instructions
+//
+//*****************************************************************************
+
+class COperand4: public COperandBase
+{
+public:
+    COperand4(D3D10_SB_OPERAND_TYPE Type, UINT RegIndex,
+        D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT): COperandBase()
+    {
+        m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE;
+        SetSwizzle();
+        m_Type = Type;
+        m_bExtendedOperand = FALSE;
+        SetMinPrecision(MinPrecision);
+        m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT;
+        m_IndexDimension = D3D10_SB_OPERAND_INDEX_1D;
+        SetIndex(0, RegIndex);
+    }
+    COperand4(D3D10_SB_OPERAND_TYPE Type, UINT RegIndex, D3D10_SB_4_COMPONENT_NAME Component,
+        D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT): COperandBase()
+    {
+        m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE;
+        m_ComponentSelection = D3D10_SB_OPERAND_4_COMPONENT_SELECT_1_MODE;
+        m_ComponentName = Component;
+        m_Type = Type;
+        m_bExtendedOperand = FALSE;
+        SetMinPrecision(MinPrecision);
+        m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT;
+        m_IndexDimension = D3D10_SB_OPERAND_INDEX_1D;
+        SetIndex(0, RegIndex);
+    }
+    // single component select on reg, 1D indexing on address
+    COperand4(D3D10_SB_OPERAND_TYPE Type, UINT RegIndex, D3D10_SB_4_COMPONENT_NAME Component,
+             D3D10_SB_OPERAND_TYPE RelRegType, UINT RelRegIndex, D3D10_SB_4_COMPONENT_NAME RelComponentName,
+             D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT,
+             D3D11_SB_OPERAND_MIN_PRECISION RelRegMinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT): COperandBase()
+    {
+        m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE;
+        m_ComponentSelection = D3D10_SB_OPERAND_4_COMPONENT_SELECT_1_MODE;
+        m_ComponentName = Component;
+        m_Type = Type;
+        m_bExtendedOperand = FALSE;
+        SetMinPrecision(MinPrecision);
+        m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT;
+        m_IndexDimension = D3D10_SB_OPERAND_INDEX_1D;
+        SetIndex(0, RegIndex, RelRegType, RelRegIndex, 0xFFFFFFFF, RelComponentName, RelRegMinPrecision);
+    }
+    // 4-component source operand with relative addressing
+    COperand4(D3D10_SB_OPERAND_TYPE Type, UINT RegIndex,
+             D3D10_SB_OPERAND_TYPE RelRegType, UINT RelRegIndex, D3D10_SB_4_COMPONENT_NAME RelComponentName,
+             D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT,
+             D3D11_SB_OPERAND_MIN_PRECISION RelRegMinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT): COperandBase()
+    {
+        m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE;
+        SetSwizzle();
+        m_Type = Type;
+        m_bExtendedOperand = FALSE;
+        SetMinPrecision(MinPrecision);
+        m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT;
+        m_IndexDimension = D3D10_SB_OPERAND_INDEX_1D;
+        SetIndex(0, RegIndex, RelRegType, RelRegIndex, 0xFFFFFFFF, RelComponentName, RelRegMinPrecision);
+    }
+    // 4-component source operand with relative addressing
+    COperand4(D3D10_SB_OPERAND_TYPE Type, UINT RegIndex,
+        D3D10_SB_OPERAND_TYPE RelRegType, UINT RelRegIndex, UINT RelRegIndex1, D3D10_SB_4_COMPONENT_NAME RelComponentName,
+        D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT,
+        D3D11_SB_OPERAND_MIN_PRECISION RelRegMinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT): COperandBase()
+    {
+        m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE;
+        SetSwizzle();
+        m_Type = Type;
+        m_bExtendedOperand = FALSE;
+        SetMinPrecision(MinPrecision);
+        m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT;
+        m_IndexDimension = D3D10_SB_OPERAND_INDEX_1D;
+        SetIndex(0, RegIndex, RelRegType, RelRegIndex, RelRegIndex1, RelComponentName, RelRegMinPrecision);
+    }
+    COperand4(D3D10_SB_OPERAND_TYPE Type, UINT RegIndex,
+             BYTE SwizzleX, BYTE SwizzleY, BYTE SwizzleZ, BYTE SwizzleW,
+             D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT): COperandBase()
+    {
+        m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE;
+        SetSwizzle(SwizzleX, SwizzleY, SwizzleZ, SwizzleW);
+        m_Type = Type;
+        m_bExtendedOperand = FALSE;
+        SetMinPrecision(MinPrecision);
+        m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT;
+        m_IndexDimension = D3D10_SB_OPERAND_INDEX_1D;
+        SetIndex(0, RegIndex);
+    }
+    // 4-component source operand with relative addressing
+    COperand4(D3D10_SB_OPERAND_TYPE Type, UINT RegIndex,
+             BYTE SwizzleX, BYTE SwizzleY, BYTE SwizzleZ, BYTE SwizzleW,
+             D3D10_SB_OPERAND_TYPE RelRegType, UINT RelRegIndex, UINT RelRegIndex1,
+             D3D10_SB_4_COMPONENT_NAME RelComponentName,
+             D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT,
+             D3D11_SB_OPERAND_MIN_PRECISION RelRegMinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT): COperandBase()
+    {
+        m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE;
+        SetSwizzle(SwizzleX, SwizzleY, SwizzleZ, SwizzleW);
+        m_Type = Type;
+        m_bExtendedOperand = FALSE;
+        SetMinPrecision(MinPrecision);
+        m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT;
+        m_IndexDimension = D3D10_SB_OPERAND_INDEX_1D;
+        SetIndex(0, RegIndex, RelRegType, RelRegIndex, RelRegIndex1, RelComponentName, RelRegMinPrecision);
+    }
+
+    friend class CShaderAsm;
+    friend class CShaderCodeParser;
+    friend class CInstruction;
+};
+//*****************************************************************************
+//
+// class COperandDst
+//
+// Encapsulates a destination operand in shader instructions
+//
+//*****************************************************************************
+
+class COperandDst: public COperandBase
+{
+public:
+    COperandDst(D3D10_SB_OPERAND_TYPE Type, UINT RegIndex,
+        D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT): COperandBase()
+    {
+        m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE;
+        SetMask();
+        m_Type = Type;
+        m_bExtendedOperand = FALSE;
+        SetMinPrecision(MinPrecision);
+        m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT;
+        m_IndexDimension = D3D10_SB_OPERAND_INDEX_1D;
+        SetIndex(0, RegIndex);
+    }
+    COperandDst(D3D10_SB_OPERAND_TYPE Type, UINT RegIndex, UINT WriteMask,
+        D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT): COperandBase()
+    {
+        m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE;
+        SetMask(WriteMask);
+        m_Type = Type;
+        m_bExtendedOperand = FALSE;
+        SetMinPrecision(MinPrecision);
+        m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT;
+        m_IndexDimension = D3D10_SB_OPERAND_INDEX_1D;
+        SetIndex(0, RegIndex);
+    }
+    COperandDst(D3D10_SB_OPERAND_TYPE Type, UINT RegIndex, UINT WriteMask,
+         D3D10_SB_OPERAND_TYPE RelRegType,
+         UINT RelRegIndex, UINT RelRegIndex1,
+         D3D10_SB_4_COMPONENT_NAME RelComponentName,
+         D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT,
+         D3D11_SB_OPERAND_MIN_PRECISION RelRegMinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT)
+         :COperandBase()
+    {
+        m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE;
+        SetMask(WriteMask);
+        m_Type = Type;
+        m_bExtendedOperand = FALSE;
+        SetMinPrecision(MinPrecision);
+        m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT;
+        m_IndexDimension = D3D10_SB_OPERAND_INDEX_1D;
+        SetIndex(0, RegIndex, RelRegType, RelRegIndex, RelRegIndex1, RelComponentName, RelRegMinPrecision);
+    }
+    COperandDst(D3D10_SB_OPERAND_TYPE Type, UINT RegIndex, UINT WriteMask,
+                D3D10_SB_OPERAND_TYPE RelRegType, UINT RelRegIndex, UINT RelRegIndex1,
+                D3D10_SB_4_COMPONENT_NAME RelComponentName, UINT,
+                D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT,
+                D3D11_SB_OPERAND_MIN_PRECISION RelReg1MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT) 
+                : COperandBase()
+    {
+        m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE;
+        m_ComponentSelection = D3D10_SB_OPERAND_4_COMPONENT_MASK_MODE;
+        m_WriteMask = WriteMask;
+        m_Type = Type;
+        m_bExtendedOperand = FALSE;
+        SetMinPrecision(MinPrecision);
+        m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT;
+        m_IndexDimension = D3D10_SB_OPERAND_INDEX_2D;
+        SetIndex(0, RegIndex);
+        SetIndex(1, RelRegIndex, RelRegType, RelRegIndex1, 0, RelComponentName, RelReg1MinPrecision);
+    }
+    // 2D dst (e.g. for GS input decl)
+    COperandDst(D3D10_SB_OPERAND_TYPE Type, UINT RegIndex0, UINT RegIndex1,UINT WriteMask,
+        D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT): COperandBase()
+    {
+        m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE;
+        m_ComponentSelection = D3D10_SB_OPERAND_4_COMPONENT_MASK_MODE;
+        m_WriteMask = WriteMask;
+        m_Type = Type;
+        m_bExtendedOperand = FALSE;
+        SetMinPrecision(MinPrecision);
+        m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT;
+        m_IndexDimension = D3D10_SB_OPERAND_INDEX_2D;
+        SetIndex(0, RegIndex0);
+        SetIndex(1, RegIndex1);
+    }
+    // Used for operands without indices
+    COperandDst(D3D10_SB_OPERAND_TYPE Type,
+        D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT): COperandBase()
+    {
+        switch( Type )
+        {
+        case D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH:
+        case D3D11_SB_OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL:
+        case D3D11_SB_OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL:
+        case D3D11_SB_OPERAND_TYPE_OUTPUT_STENCIL_REF:
+            m_NumComponents = D3D10_SB_OPERAND_1_COMPONENT;
+            break;
+        default:
+            m_NumComponents = D3D10_SB_OPERAND_0_COMPONENT;
+            break;
+        }
+        m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE;
+        m_IndexDimension = D3D10_SB_OPERAND_INDEX_0D;
+        m_Type = Type;
+        m_bExtendedOperand = FALSE;
+        SetMinPrecision(MinPrecision);
+    }
+    COperandDst(UINT WriteMask, D3D10_SB_OPERAND_TYPE Type,
+        D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT)
+        : COperandBase() // param order disambiguates from another constructor.
+    {
+        m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE;
+        m_ComponentSelection = D3D10_SB_OPERAND_4_COMPONENT_MASK_MODE;
+        m_WriteMask = WriteMask;
+        m_Type = Type;
+        m_bExtendedOperand = FALSE;
+        SetMinPrecision(MinPrecision);
+        m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT;
+        m_IndexDimension = D3D10_SB_OPERAND_INDEX_0D;
+    }
+
+    friend class CShaderAsm;
+    friend class CShaderCodeParser;
+    friend class CInstruction;
+};
+
+//*****************************************************************************
+//
+// class COperand2D
+//
+// Encapsulates 2 dimensional source operand with 4 components in shader instructions
+//
+//*****************************************************************************
+
+class COperand2D: public COperandBase
+{
+public:
+    COperand2D(D3D10_SB_OPERAND_TYPE Type, UINT RegIndex0, UINT RegIndex1,
+        D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT)
+        : COperandBase()
+    {
+        m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE;
+        SetSwizzle();
+        m_Type = Type;
+        m_bExtendedOperand = FALSE;
+        SetMinPrecision(MinPrecision);
+        m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT;
+        m_IndexDimension = D3D10_SB_OPERAND_INDEX_2D;
+        SetIndex(0, RegIndex0);
+        SetIndex(1, RegIndex1);
+    }
+    COperand2D(D3D10_SB_OPERAND_TYPE Type, UINT RegIndex0, UINT RegIndex1, D3D10_SB_4_COMPONENT_NAME Component,
+                D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT)
+              : COperandBase()
+    {
+        m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE;
+        m_ComponentSelection = D3D10_SB_OPERAND_4_COMPONENT_SELECT_1_MODE;
+        m_ComponentName = Component;
+        m_Type = Type;
+        m_bExtendedOperand = FALSE;
+        SetMinPrecision(MinPrecision);
+        m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT;
+        m_IndexDimension = D3D10_SB_OPERAND_INDEX_2D;
+        SetIndex(0, RegIndex0);
+        SetIndex(1, RegIndex1);
+    }
+    // 2-dimensional 4-component operand with relative addressing the second index
+    // For example:
+    //      c2[x12[3].w + 7]
+    //  Type = c
+    //  RelRegType = x
+    //  RegIndex0 = 2
+    //  RegIndex1 = 7
+    //  RelRegIndex = 12
+    //  RelRegIndex1 = 3
+    //  RelComponentName = w
+    //
+    COperand2D(D3D10_SB_OPERAND_TYPE Type, 
+              UINT RegIndex0, 
+              UINT RegIndex1,
+              D3D10_SB_OPERAND_TYPE RelRegType, 
+              UINT RelRegIndex, 
+              UINT RelRegIndex1, 
+              D3D10_SB_4_COMPONENT_NAME RelComponentName,
+              D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT,
+              D3D11_SB_OPERAND_MIN_PRECISION RelReg1MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT )
+            : COperandBase()
+    {
+        m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE;
+        SetSwizzle();
+        m_Type = Type;
+        m_bExtendedOperand = FALSE;
+        SetMinPrecision(MinPrecision);
+        m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT;
+        m_IndexDimension = D3D10_SB_OPERAND_INDEX_2D;
+        SetIndex(0, RegIndex0);
+        SetIndex(1, RegIndex1, RelRegType, RelRegIndex, RelRegIndex1, RelComponentName, RelReg1MinPrecision);
+    }
+    // 2-dimensional 4-component operand with relative addressing a second index
+    // For example:
+    //      c2[r12.y + 7]
+    //  Type = c
+    //  RelRegType = r
+    //  RegIndex0 = 2
+    //  RegIndex1 = 7
+    //  RelRegIndex = 12
+    //  RelRegIndex1 = 3
+    //  RelComponentName = y
+    //
+    COperand2D(D3D10_SB_OPERAND_TYPE Type, 
+              UINT RegIndex0, 
+              UINT RegIndex1,
+              D3D10_SB_OPERAND_TYPE RelRegType, 
+              UINT RelRegIndex, 
+              D3D10_SB_4_COMPONENT_NAME RelComponentName,
+              D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT,
+              D3D11_SB_OPERAND_MIN_PRECISION RelReg1MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT )
+            : COperandBase()
+    {
+        m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE;
+        SetSwizzle();
+        m_Type = Type;
+        m_bExtendedOperand = FALSE;
+        SetMinPrecision(MinPrecision);
+        m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT;
+        m_IndexDimension = D3D10_SB_OPERAND_INDEX_2D;
+        SetIndex(0, RegIndex0);
+        SetIndex(1, RegIndex1, RelRegType, RelRegIndex, 0, RelComponentName, RelReg1MinPrecision);
+    }
+    // 2-dimensional 4-component operand with relative addressing both operands
+    COperand2D(D3D10_SB_OPERAND_TYPE Type,
+              BOOL bIndexRelative0, BOOL bIndexRelative1,
+              UINT RegIndex0, UINT RegIndex1,
+              D3D10_SB_OPERAND_TYPE RelRegType0, UINT RelRegIndex0, UINT RelRegIndex10, D3D10_SB_4_COMPONENT_NAME RelComponentName0,
+              D3D10_SB_OPERAND_TYPE RelRegType1, UINT RelRegIndex1, UINT RelRegIndex11, D3D10_SB_4_COMPONENT_NAME RelComponentName1,
+              D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT,
+              D3D11_SB_OPERAND_MIN_PRECISION RelReg0MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT,
+              D3D11_SB_OPERAND_MIN_PRECISION RelReg1MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT )
+              : COperandBase()
+    {
+        m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE;
+        SetSwizzle();
+        m_Type = Type;
+        m_bExtendedOperand = FALSE;
+        SetMinPrecision(MinPrecision);
+        m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT;
+        m_IndexDimension = D3D10_SB_OPERAND_INDEX_2D;
+        if (bIndexRelative0)
+            SetIndex(0, RegIndex0, RelRegType0, RelRegIndex0, RelRegIndex10, RelComponentName0, RelReg0MinPrecision);
+        else
+            SetIndex(0, RegIndex0);
+        if (bIndexRelative1)
+            SetIndex(1, RegIndex1, RelRegType1, RelRegIndex1, RelRegIndex11, RelComponentName1, RelReg1MinPrecision);
+        else
+            SetIndex(1, RegIndex1);
+    }
+    COperand2D(D3D10_SB_OPERAND_TYPE Type, UINT RegIndex0, UINT RegIndex1,
+              BYTE SwizzleX, BYTE SwizzleY, BYTE SwizzleZ, BYTE SwizzleW,
+              D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT): COperandBase()
+    {
+        m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE;
+        SetSwizzle(SwizzleX, SwizzleY, SwizzleZ, SwizzleW);
+        m_Type = Type;
+        m_bExtendedOperand = FALSE;
+        SetMinPrecision(MinPrecision);
+        m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT;
+        m_IndexDimension = D3D10_SB_OPERAND_INDEX_2D;
+        SetIndex(0, RegIndex0);
+        SetIndex(1, RegIndex1);
+    }
+    // 2-dimensional 4-component operand with relative addressing and swizzle
+    COperand2D(D3D10_SB_OPERAND_TYPE Type,
+              BYTE SwizzleX, BYTE SwizzleY, BYTE SwizzleZ, BYTE SwizzleW,
+              BOOL bIndexRelative0, BOOL bIndexRelative1,
+              UINT RegIndex0, D3D10_SB_OPERAND_TYPE RelRegType0, UINT RelRegIndex0, UINT RelRegIndex10, D3D10_SB_4_COMPONENT_NAME RelComponentName0,
+              UINT RegIndex1, D3D10_SB_OPERAND_TYPE RelRegType1, UINT RelRegIndex1, UINT RelRegIndex11, D3D10_SB_4_COMPONENT_NAME RelComponentName1,
+              D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT,
+              D3D11_SB_OPERAND_MIN_PRECISION RelReg0MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT,
+              D3D11_SB_OPERAND_MIN_PRECISION RelReg1MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT )
+              : COperandBase()
+    {
+        m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE;
+        SetSwizzle(SwizzleX, SwizzleY, SwizzleZ, SwizzleW);
+        m_Type = Type;
+        m_bExtendedOperand = FALSE;
+        SetMinPrecision(MinPrecision);
+        m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT;
+        m_IndexDimension = D3D10_SB_OPERAND_INDEX_2D;
+        if (bIndexRelative0)
+            SetIndex(0, RegIndex0, RelRegType0, RelRegIndex0, RelRegIndex10, RelComponentName0, RelReg0MinPrecision);
+        else
+            SetIndex(0, RegIndex0);
+
+        if (bIndexRelative1)
+            SetIndex(1, RegIndex1, RelRegType1, RelRegIndex1, RelRegIndex11, RelComponentName1, RelReg1MinPrecision);
+        else
+            SetIndex(1, RegIndex1);
+    }
+
+    friend class CShaderAsm;
+    friend class CShaderCodeParser;
+    friend class CInstruction;
+};
+
+class COperand3D: public COperandBase
+{
+public:
+    COperand3D(D3D10_SB_OPERAND_TYPE Type, UINT RegIndex0, UINT RegIndex1, UINT RegIndex2,
+        D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT)
+        : COperandBase()
+    {
+        m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE;
+        SetSwizzle();
+        m_Type = Type;
+        m_bExtendedOperand = FALSE;
+        SetMinPrecision(MinPrecision);
+        m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT;
+        m_IndexDimension = D3D10_SB_OPERAND_INDEX_3D;
+        SetIndex(0, RegIndex0);
+        SetIndex(1, RegIndex1);
+        SetIndex(2, RegIndex2);
+    }
+
+    friend class CShaderAsm;
+    friend class CShaderCodeParser;
+    friend class CInstruction;
+};
+
+//*****************************************************************************
+//
+//  CInstruction
+//
+//*****************************************************************************
+
+// Structures for additional per-instruction fields unioned in CInstruction.
+// These structures don't contain ALL info used by the particular instruction,
+// only additional info not already in CInstruction.  Some instructions don't
+// need such structures because CInstruction already has the correct data
+// fields.
+
+struct CGlobalFlagsDecl
+{
+    UINT Flags;
+};
+
+struct CInputSystemInterpretedValueDecl
+{
+    D3D10_SB_NAME  Name;
+};
+
+struct CInputSystemGeneratedValueDecl
+{
+    D3D10_SB_NAME  Name;
+};
+
+struct CInputPSDecl
+{
+    D3D10_SB_INTERPOLATION_MODE InterpolationMode;
+};
+
+struct CInputPSSystemInterpretedValueDecl
+{
+    D3D10_SB_NAME  Name;
+    D3D10_SB_INTERPOLATION_MODE InterpolationMode;
+};
+
+struct CInputPSSystemGeneratedValueDecl
+{
+    D3D10_SB_NAME  Name;
+    D3D10_SB_INTERPOLATION_MODE InterpolationMode;
+};
+
+struct COutputSystemInterpretedValueDecl
+{
+    D3D10_SB_NAME  Name;
+};
+
+struct COutputSystemGeneratedValueDecl
+{
+    D3D10_SB_NAME  Name;
+};
+
+struct CIndexRangeDecl
+{
+    UINT    RegCount;
+};
+
+struct CResourceDecl
+{
+    D3D10_SB_RESOURCE_DIMENSION      Dimension;
+    D3D10_SB_RESOURCE_RETURN_TYPE    ReturnType[4];
+    UINT                             SampleCount;
+    UINT                             Space;
+};
+
+struct CConstantBufferDecl
+{
+    D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN AccessPattern;
+    UINT Size;
+    UINT Space;
+};
+
+struct COutputTopologyDecl
+{
+    D3D10_SB_PRIMITIVE_TOPOLOGY    Topology;
+};
+
+struct CInputPrimitiveDecl
+{
+    D3D10_SB_PRIMITIVE             Primitive;
+};
+
+struct CGSMaxOutputVertexCountDecl
+{
+    UINT    MaxOutputVertexCount;
+};
+
+struct CGSInstanceCountDecl
+{
+    UINT    InstanceCount;
+};
+
+struct CSamplerDecl
+{
+    D3D10_SB_SAMPLER_MODE          SamplerMode;
+    UINT Space;
+};
+
+struct CStreamDecl
+{
+    UINT    Stream;
+};
+
+struct CTempsDecl
+{
+    UINT    NumTemps;
+};
+
+struct CIndexableTempDecl
+{
+    UINT    IndexableTempNumber;
+    UINT    NumRegisters;
+    UINT    Mask; // .x, .xy, .xzy or .xyzw (D3D10_SB_OPERAND_4_COMPONENT_MASK_* )
+};
+
+struct CHSDSInputControlPointCountDecl
+{
+    UINT    InputControlPointCount;
+};
+
+struct CHSOutputControlPointCountDecl
+{
+    UINT    OutputControlPointCount;
+};
+
+struct CTessellatorDomainDecl
+{
+    D3D11_SB_TESSELLATOR_DOMAIN TessellatorDomain;
+};
+
+struct CTessellatorPartitioningDecl
+{
+    D3D11_SB_TESSELLATOR_PARTITIONING TessellatorPartitioning;
+};
+
+struct CTessellatorOutputPrimitiveDecl
+{
+    D3D11_SB_TESSELLATOR_OUTPUT_PRIMITIVE TessellatorOutputPrimitive;
+};
+
+struct CHSMaxTessFactorDecl
+{
+    float MaxTessFactor;
+};
+
+struct CHSForkPhaseInstanceCountDecl
+{
+    UINT InstanceCount;
+};
+
+struct CHSJoinPhaseInstanceCountDecl
+{
+    UINT InstanceCount;
+};
+
+struct CShaderMessage
+{
+    D3D11_SB_SHADER_MESSAGE_ID     MessageID;
+    D3D11_SB_SHADER_MESSAGE_FORMAT FormatStyle;
+    PCSTR                          pFormatString;
+    UINT                           NumOperands;
+    COperandBase*                  pOperands;
+};
+    
+struct CCustomData
+{
+    D3D10_SB_CUSTOMDATA_CLASS  Type;
+    UINT                    DataSizeInBytes;
+    void*                   pData;
+
+    union
+    {
+        CShaderMessage      ShaderMessage;
+    };
+};
+
+struct CFunctionTableDecl
+{
+    UINT                    FunctionTableNumber;
+    UINT                    TableLength;
+    UINT*                   pFunctionIdentifiers;
+};
+
+struct CInterfaceDecl
+{
+    WORD                    InterfaceNumber;
+    WORD                    ArrayLength;
+    UINT                    ExpectedTableSize;
+    UINT                    TableLength;
+    UINT*                   pFunctionTableIdentifiers;
+    bool                    bDynamicallyIndexed;
+};
+
+struct CFunctionBodyDecl
+{
+    UINT FunctionBodyNumber;
+};
+
+struct CInterfaceCall
+{
+    UINT                                    FunctionIndex;
+    COperandBase*                           pInterfaceOperand;
+};
+
+struct CThreadGroupDeclaration
+{
+    UINT    x;
+    UINT    y;
+    UINT    z;
+};
+
+struct CTypedUAVDeclaration
+{
+    D3D10_SB_RESOURCE_DIMENSION      Dimension;
+    D3D10_SB_RESOURCE_RETURN_TYPE    ReturnType[4];
+    UINT                             Flags;
+    UINT                             Space;
+};
+
+struct CStructuredUAVDeclaration
+{
+    UINT    ByteStride;
+    UINT    Flags;
+    UINT    Space;
+};
+
+struct CRawUAVDeclaration
+{
+    UINT    Flags;
+    UINT    Space;
+};
+
+struct CRawTGSMDeclaration
+{
+    UINT    ByteCount;
+};
+
+struct CStructuredTGSMDeclaration
+{
+    UINT    StructByteStride;
+    UINT    StructCount;
+};
+
+struct CRawSRVDeclaration
+{
+    UINT    Space;
+};
+
+struct CStructuredSRVDeclaration
+{
+    UINT    ByteStride;
+    UINT    Space;
+};
+
+struct CSyncFlags
+{
+    bool bThreadsInGroup;
+    bool bThreadGroupSharedMemory;
+    bool bUnorderedAccessViewMemoryGlobal;
+    bool bUnorderedAccessViewMemoryGroup; // exclusive to global
+};
+
+class CInstruction
+{
+protected:
+    static const UINT MAX_PRIVATE_DATA_COUNT = 2;
+public:
+    CInstruction():m_OpCode(D3D10_SB_OPCODE_ADD) { Clear(); }
+    CInstruction(D3D10_SB_OPCODE_TYPE OpCode)
+    {
+        Clear();
+        m_OpCode = OpCode;
+        m_NumOperands = 0;
+        m_ExtendedOpCodeCount = 0;   
+    }
+    CInstruction(D3D10_SB_OPCODE_TYPE OpCode, COperandBase& Operand0,
+                 D3D10_SB_INSTRUCTION_TEST_BOOLEAN Test)
+    {
+        Clear();
+        m_OpCode = OpCode;
+        m_NumOperands = 1;
+        m_ExtendedOpCodeCount = 0;   
+        m_Test = Test;
+        m_Operands[0] = Operand0;  
+    }
+    CInstruction(D3D10_SB_OPCODE_TYPE OpCode, COperandBase& Operand0, COperandBase& Operand1)
+    {
+        Clear();
+        m_OpCode = OpCode;
+        m_NumOperands = 2;
+        m_ExtendedOpCodeCount = 0;   
+        m_Operands[0] = Operand0;
+        m_Operands[1] = Operand1;   
+    }
+    CInstruction(D3D10_SB_OPCODE_TYPE OpCode, COperandBase& Operand0, COperandBase& Operand1, COperandBase& Operand2)
+    {
+        Clear();
+        m_OpCode = OpCode;
+        m_NumOperands = 3;
+        m_ExtendedOpCodeCount = 0;   
+        m_Operands[0] = Operand0;
+        m_Operands[1] = Operand1;
+        m_Operands[2] = Operand2;
+      
+    }
+    CInstruction(D3D10_SB_OPCODE_TYPE OpCode, COperandBase& Operand0, COperandBase& Operand1,
+                 COperandBase& Operand2, COperandBase& Operand3)
+    {
+        Clear();
+        m_OpCode = OpCode;
+        m_NumOperands = 4;
+        m_ExtendedOpCodeCount = 0;   
+        m_Operands[0] = Operand0;
+        m_Operands[1] = Operand1;
+        m_Operands[2] = Operand2;
+        m_Operands[3] = Operand3;
+        memset(m_TexelOffset, 0, sizeof(m_TexelOffset));
+    }
+    void ClearAllocations()
+    {
+        if (m_OpCode == D3D10_SB_OPCODE_CUSTOMDATA)
+        {
+            free(m_CustomData.pData);
+            if (m_CustomData.Type == D3D11_SB_CUSTOMDATA_SHADER_MESSAGE)
+            {
+                free(m_CustomData.ShaderMessage.pOperands);
+            }
+        }
+        else if( m_OpCode == D3D11_SB_OPCODE_DCL_FUNCTION_TABLE )
+        {
+            free(m_FunctionTableDecl.pFunctionIdentifiers);
+        }
+        else if( m_OpCode == D3D11_SB_OPCODE_DCL_INTERFACE )
+        {
+            free(m_InterfaceDecl.pFunctionTableIdentifiers);
+        }
+    }
+    void Clear(bool bIncludeCustomData = false)
+    {
+        if( bIncludeCustomData ) // don't need to do this on initial constructor, only if recycling the object.
+        {
+            ClearAllocations();
+        }
+        memset (this, 0, sizeof(*this));
+    }
+    ~CInstruction()
+    { 
+        ClearAllocations();
+    }
+    const COperandBase& Operand(UINT Index) const {return m_Operands[Index];}
+    D3D10_SB_OPCODE_TYPE OpCode() const {return m_OpCode;}
+    void SetNumOperands(UINT NumOperands) {m_NumOperands = NumOperands;}
+    UINT NumOperands() const {return m_NumOperands;}
+    void SetTest(D3D10_SB_INSTRUCTION_TEST_BOOLEAN Test) {m_Test = Test;}
+    void SetPreciseMask(UINT PreciseMask) {m_PreciseMask = PreciseMask;}
+    D3D10_SB_INSTRUCTION_TEST_BOOLEAN Test() const {return m_Test;}
+    void SetTexelOffset( const INT8 texelOffset[3] )
+    {
+        m_OpCodeEx[m_ExtendedOpCodeCount++] = D3D10_SB_EXTENDED_OPCODE_SAMPLE_CONTROLS;
+        memcpy(m_TexelOffset, texelOffset,sizeof(m_TexelOffset));
+    }
+    void SetTexelOffset( INT8 x, INT8 y, INT8 z)
+    {
+        m_OpCodeEx[m_ExtendedOpCodeCount++] = D3D10_SB_EXTENDED_OPCODE_SAMPLE_CONTROLS;
+        m_TexelOffset[0] = x;
+        m_TexelOffset[1] = y;
+        m_TexelOffset[2] = z;
+    }
+    void SetResourceDim(D3D10_SB_RESOURCE_DIMENSION Dim,
+                        D3D10_SB_RESOURCE_RETURN_TYPE RetType[4],
+                        UINT StructureStride)
+    {
+        m_OpCodeEx[m_ExtendedOpCodeCount++] = D3D11_SB_EXTENDED_OPCODE_RESOURCE_DIM;
+        m_OpCodeEx[m_ExtendedOpCodeCount++] = D3D11_SB_EXTENDED_OPCODE_RESOURCE_RETURN_TYPE;
+        m_ResourceDimEx = Dim;
+        m_ResourceDimStructureStrideEx = StructureStride;
+        memcpy(m_ResourceReturnTypeEx, RetType,4*sizeof(D3D10_SB_RESOURCE_RETURN_TYPE));
+    }
+    BOOL Disassemble(__out_ecount(StringSize) LPSTR pString, UINT StringSize);
+
+    // Private data is used by D3D runtime
+    void SetPrivateData(UINT Value, UINT index = 0) 
+    {
+        if (index < MAX_PRIVATE_DATA_COUNT)
+        {
+            m_PrivateData[index] = Value;
+        }
+    }
+    UINT PrivateData(UINT index = 0) const 
+    {
+        if (index >= MAX_PRIVATE_DATA_COUNT)
+            return 0xFFFFFFFF;
+        return m_PrivateData[index];
+    }
+    // Get the precise mask
+    UINT GetPreciseMask() const {return m_PreciseMask;}
+
+    D3D10_SB_OPCODE_TYPE           m_OpCode;
+    COperandBase                m_Operands[D3D10_SB_MAX_INSTRUCTION_OPERANDS];
+    UINT                        m_NumOperands;
+    UINT                        m_ExtendedOpCodeCount;
+    UINT                        m_PreciseMask;
+    D3D10_SB_EXTENDED_OPCODE_TYPE  m_OpCodeEx[D3D11_SB_MAX_SIMULTANEOUS_EXTENDED_OPCODES];
+    INT8                        m_TexelOffset[3]; // for extended opcode only
+    D3D10_SB_RESOURCE_DIMENSION m_ResourceDimEx; // for extended opcode only
+    UINT                        m_ResourceDimStructureStrideEx; // for extended opcode only
+    D3D10_SB_RESOURCE_RETURN_TYPE  m_ResourceReturnTypeEx[4]; // for extended opcode only
+    BOOL                        m_bNonuniformResourceIndex; // for extended opcode only
+    BOOL                        m_bNonuniformSamplerIndex;  // for extended opcode only
+    UINT                        m_PrivateData[MAX_PRIVATE_DATA_COUNT];
+    BOOL                        m_bSaturate;
+    union // extra info needed by some instructions
+    {
+        CInputSystemInterpretedValueDecl    m_InputDeclSIV;
+        CInputSystemGeneratedValueDecl      m_InputDeclSGV;
+        CInputPSDecl                        m_InputPSDecl;
+        CInputPSSystemInterpretedValueDecl  m_InputPSDeclSIV;
+        CInputPSSystemGeneratedValueDecl    m_InputPSDeclSGV;
+        COutputSystemInterpretedValueDecl   m_OutputDeclSIV;
+        COutputSystemGeneratedValueDecl     m_OutputDeclSGV;
+        CIndexRangeDecl                     m_IndexRangeDecl;
+        CResourceDecl                       m_ResourceDecl;
+        CConstantBufferDecl                 m_ConstantBufferDecl;
+        CInputPrimitiveDecl                 m_InputPrimitiveDecl;
+        COutputTopologyDecl                 m_OutputTopologyDecl;
+        CGSMaxOutputVertexCountDecl         m_GSMaxOutputVertexCountDecl;
+        CGSInstanceCountDecl                m_GSInstanceCountDecl;
+        CSamplerDecl                        m_SamplerDecl;
+        CStreamDecl                         m_StreamDecl;
+        CTempsDecl                          m_TempsDecl;
+        CIndexableTempDecl                  m_IndexableTempDecl;
+        CGlobalFlagsDecl                    m_GlobalFlagsDecl;
+        CCustomData                         m_CustomData;
+        CInterfaceDecl                      m_InterfaceDecl;
+        CFunctionTableDecl                  m_FunctionTableDecl;
+        CFunctionBodyDecl                   m_FunctionBodyDecl;
+        CInterfaceCall                      m_InterfaceCall;
+        D3D10_SB_INSTRUCTION_TEST_BOOLEAN    m_Test;
+        D3D10_SB_RESINFO_INSTRUCTION_RETURN_TYPE m_ResInfoReturnType;
+        D3D10_SB_INSTRUCTION_RETURN_TYPE    m_InstructionReturnType;
+        CHSDSInputControlPointCountDecl     m_InputControlPointCountDecl;
+        CHSOutputControlPointCountDecl      m_OutputControlPointCountDecl;
+        CTessellatorDomainDecl              m_TessellatorDomainDecl;
+        CTessellatorPartitioningDecl        m_TessellatorPartitioningDecl;
+        CTessellatorOutputPrimitiveDecl     m_TessellatorOutputPrimitiveDecl;
+        CHSMaxTessFactorDecl                m_HSMaxTessFactorDecl;
+        CHSForkPhaseInstanceCountDecl       m_HSForkPhaseInstanceCountDecl;
+        CHSJoinPhaseInstanceCountDecl       m_HSJoinPhaseInstanceCountDecl;
+        CThreadGroupDeclaration             m_ThreadGroupDecl;
+        CTypedUAVDeclaration                m_TypedUAVDecl;
+        CStructuredUAVDeclaration           m_StructuredUAVDecl;
+        CRawUAVDeclaration                  m_RawUAVDecl;
+        CStructuredTGSMDeclaration          m_StructuredTGSMDecl;
+        CRawSRVDeclaration                  m_RawSRVDecl;
+        CStructuredSRVDeclaration           m_StructuredSRVDecl;
+        CRawTGSMDeclaration                 m_RawTGSMDecl;
+        CSyncFlags                          m_SyncFlags;
+    };
+};
+
+// ****************************************************************************
+//
+// class CShaderAsm
+//
+// The class is used to build a binary representation of a shader.
+// Usage scenario:
+//      1. Call Init with the initial internal buffer size in UINTs. The
+//         internal buffer will grow if needed
+//      2. Call StartShader()
+//      3. Call Emit*() functions to assemble a shader
+//      4. Call EndShader()
+//      5. Call GetShader() to get the binary representation
+//
+//
+// ****************************************************************************
+class CShaderAsm
+{
+public:
+    CShaderAsm():
+        m_dwFunc(NULL),
+        m_Index(0),
+        m_StartOpIndex(0),
+        m_BufferSize(0)
+    {
+        Init(1024);
+    };
+    ~CShaderAsm()
+    {
+        free(m_dwFunc);
+    };
+    // Initializes the object with the initial buffer size in UINTs
+    HRESULT Init(UINT BufferSize)
+    {
+        if( BufferSize >= UINT( -1 ) / sizeof( UINT ) )
+        {
+            return E_OUTOFMEMORY;
+        }
+        m_dwFunc = (UINT*)malloc(BufferSize*sizeof(UINT));
+        if (m_dwFunc == NULL)
+        {
+            return E_OUTOFMEMORY;
+        }
+        m_BufferSize = BufferSize;
+        Reset();
+        return S_OK;
+    }
+    UINT* GetShader()          {return m_dwFunc;}
+    UINT  ShaderSizeInDWORDs() {return m_Index;}
+    UINT  ShaderSizeInBytes() {return ShaderSizeInDWORDs() * sizeof(*m_dwFunc);}
+    UINT  LastInstOffsetInDWORDs() {return m_StartOpIndex;}
+    UINT  LastInstOffsetInBytes() {return LastInstOffsetInDWORDs() * sizeof(*m_dwFunc);}
+
+    // This function should be called to mark the start of a shader
+    void StartShader(D3D10_SB_TOKENIZED_PROGRAM_TYPE ShaderType, UINT vermajor,UINT verminor)
+    {
+        Reset();
+        UINT Token = ENCODE_D3D10_SB_TOKENIZED_PROGRAM_VERSION_TOKEN(ShaderType, vermajor, verminor);
+        OPCODE(Token);
+        OPCODE(0);  // Reserve space for length
+    }
+    // Should be called at the end of the shader
+    void EndShader()
+    {
+        if (1 < m_BufferSize)
+            m_dwFunc[1] = ENCODE_D3D10_SB_TOKENIZED_PROGRAM_LENGTH(m_Index);
+    }
+    // Emit a resource declaration
+    void EmitResourceDecl(D3D10_SB_RESOURCE_DIMENSION Dimension, UINT TRegIndex,
+                          D3D10_SB_RESOURCE_RETURN_TYPE ReturnTypeForX,
+                          D3D10_SB_RESOURCE_RETURN_TYPE ReturnTypeForY,
+                          D3D10_SB_RESOURCE_RETURN_TYPE ReturnTypeForZ,
+                          D3D10_SB_RESOURCE_RETURN_TYPE ReturnTypeForW)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_RESOURCE) |
+               ENCODE_D3D10_SB_RESOURCE_DIMENSION(Dimension) );
+        EmitOperand(COperand(D3D10_SB_OPERAND_TYPE_RESOURCE, TRegIndex));
+        FUNC(ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(ReturnTypeForX, 0) |
+             ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(ReturnTypeForY, 1) |
+             ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(ReturnTypeForZ, 2) |
+             ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(ReturnTypeForW, 3));
+        ENDINSTRUCTION();
+    }
+    // Emit D3D12 resource declaration
+    void EmitIndexableResourceDecl(UINT uTable, UINT uTableLB, UINT uTableUB,
+                                   D3D10_SB_RESOURCE_DIMENSION Dimension,
+                                   D3D10_SB_RESOURCE_RETURN_TYPE ReturnTypeForX,
+                                   D3D10_SB_RESOURCE_RETURN_TYPE ReturnTypeForY,
+                                   D3D10_SB_RESOURCE_RETURN_TYPE ReturnTypeForZ,
+                                   D3D10_SB_RESOURCE_RETURN_TYPE ReturnTypeForW,
+                                   UINT uSpace)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_RESOURCE) |
+               ENCODE_D3D10_SB_RESOURCE_DIMENSION(Dimension) );
+        EmitOperand(COperand3D(D3D10_SB_OPERAND_TYPE_RESOURCE, uTable, uTableLB, uTableUB));
+        FUNC(ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(ReturnTypeForX, 0) |
+             ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(ReturnTypeForY, 1) |
+             ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(ReturnTypeForZ, 2) |
+             ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(ReturnTypeForW, 3));
+        FUNC(uSpace);
+        ENDINSTRUCTION();
+    }
+    // Emit a resource declaration (multisampled)
+    void EmitResourceMSDecl(D3D10_SB_RESOURCE_DIMENSION Dimension, UINT TRegIndex,
+                          D3D10_SB_RESOURCE_RETURN_TYPE ReturnTypeForX,
+                          D3D10_SB_RESOURCE_RETURN_TYPE ReturnTypeForY,
+                          D3D10_SB_RESOURCE_RETURN_TYPE ReturnTypeForZ,
+                          D3D10_SB_RESOURCE_RETURN_TYPE ReturnTypeForW,
+                          UINT SampleCount)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_RESOURCE) |
+               ENCODE_D3D10_SB_RESOURCE_DIMENSION(Dimension) |
+               ENCODE_D3D10_SB_RESOURCE_SAMPLE_COUNT(SampleCount));
+        EmitOperand(COperand(D3D10_SB_OPERAND_TYPE_RESOURCE, TRegIndex));
+        FUNC(ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(ReturnTypeForX, 0) |
+             ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(ReturnTypeForY, 1) |
+             ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(ReturnTypeForZ, 2) |
+             ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(ReturnTypeForW, 3));
+        ENDINSTRUCTION();
+    }
+    // Emit D3D12 resource declaration (multisampled)
+    void EmitIndexableResourceMSDecl(UINT uTable, UINT uTableLB, UINT uTableUB,
+                                     D3D10_SB_RESOURCE_DIMENSION Dimension,
+                                     D3D10_SB_RESOURCE_RETURN_TYPE ReturnTypeForX,
+                                     D3D10_SB_RESOURCE_RETURN_TYPE ReturnTypeForY,
+                                     D3D10_SB_RESOURCE_RETURN_TYPE ReturnTypeForZ,
+                                     D3D10_SB_RESOURCE_RETURN_TYPE ReturnTypeForW,
+                                     UINT SampleCount,
+                                     UINT uSpace)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_RESOURCE) |
+               ENCODE_D3D10_SB_RESOURCE_DIMENSION(Dimension) |
+               ENCODE_D3D10_SB_RESOURCE_SAMPLE_COUNT(SampleCount));
+        EmitOperand(COperand3D(D3D10_SB_OPERAND_TYPE_RESOURCE, uTable, uTableLB, uTableUB));
+        FUNC(ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(ReturnTypeForX, 0) |
+             ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(ReturnTypeForY, 1) |
+             ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(ReturnTypeForZ, 2) |
+             ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(ReturnTypeForW, 3));
+        FUNC(uSpace);
+        ENDINSTRUCTION();
+    }
+    // Emit a sampler declaration
+    void EmitSamplerDecl(UINT SRegIndex, D3D10_SB_SAMPLER_MODE Mode)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE( ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_SAMPLER) |
+                ENCODE_D3D10_SB_SAMPLER_MODE(Mode) );
+        EmitOperand(COperand(D3D10_SB_OPERAND_TYPE_SAMPLER, SRegIndex));
+        ENDINSTRUCTION();
+    }
+    // Emit D3D12 sampler declaration
+    void EmitIndexableSamplerDecl(UINT uTable, UINT uTableLB, UINT uTableUB, D3D10_SB_SAMPLER_MODE Mode, UINT uSpace)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE( ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_SAMPLER) |
+                ENCODE_D3D10_SB_SAMPLER_MODE(Mode) );
+        EmitOperand(COperand3D(D3D10_SB_OPERAND_TYPE_SAMPLER, uTable, uTableLB, uTableUB));
+        FUNC(uSpace);
+        ENDINSTRUCTION();
+    }
+
+    // Emit a stream declaration
+    void EmitStreamDecl(UINT SRegIndex)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE( ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_STREAM) );
+        EmitOperand(COperand(D3D11_SB_OPERAND_TYPE_STREAM, SRegIndex));
+        ENDINSTRUCTION();
+    }
+
+    // Emit an input declaration
+    void EmitInputDecl(D3D10_SB_OPERAND_TYPE RegType, UINT RegIndex, UINT WriteMask, 
+            D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT));
+        EmitOperand(COperandDst(RegType, RegIndex, WriteMask, MinPrecision));
+        ENDINSTRUCTION();
+    }
+    void EmitInputDecl2D(D3D10_SB_OPERAND_TYPE RegType, UINT RegIndex, UINT RegIndex2, UINT WriteMask, 
+            D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT));
+        EmitOperand(COperandDst(RegType, RegIndex, RegIndex2, WriteMask, MinPrecision));
+        ENDINSTRUCTION();
+    }
+
+    // Emit an input declaration for a system interpreted value
+    void EmitInputSystemInterpretedValueDecl(D3D10_SB_OPERAND_TYPE RegType, UINT RegIndex, UINT WriteMask, D3D10_SB_NAME Name,
+        D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT_SIV));
+        EmitOperand(COperandDst(RegType, RegIndex, WriteMask, MinPrecision));
+        FUNC(ENCODE_D3D10_SB_NAME(Name));
+        ENDINSTRUCTION();
+    }
+    void EmitInputSystemInterpretedValueDecl2D(UINT RegIndex, UINT RegIndex2, UINT WriteMask, D3D10_SB_NAME Name,
+        D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT_SIV));
+        EmitOperand(COperandDst(D3D10_SB_OPERAND_TYPE_INPUT, RegIndex, RegIndex2, WriteMask, MinPrecision));
+        FUNC(ENCODE_D3D10_SB_NAME(Name));
+        ENDINSTRUCTION();
+    }
+    // Emit an input declaration for a system generated value
+    void EmitInputSystemGeneratedValueDecl(UINT RegIndex, UINT WriteMask, D3D10_SB_NAME Name,
+        D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT_SGV));
+        EmitOperand(COperandDst(D3D10_SB_OPERAND_TYPE_INPUT, RegIndex, WriteMask, MinPrecision));
+        FUNC(ENCODE_D3D10_SB_NAME(Name));
+        ENDINSTRUCTION();
+    }
+    void EmitInputSystemGeneratedValueDecl2D(UINT RegIndex, UINT RegIndex2, UINT WriteMask, D3D10_SB_NAME Name,
+        D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT_SGV));
+        EmitOperand(COperandDst(D3D10_SB_OPERAND_TYPE_INPUT, RegIndex, RegIndex2, WriteMask, MinPrecision));
+        FUNC(ENCODE_D3D10_SB_NAME(Name));
+        ENDINSTRUCTION();
+    }
+    // Emit a PS input declaration
+    void EmitPSInputDecl(UINT RegIndex, UINT WriteMask, D3D10_SB_INTERPOLATION_MODE Mode, 
+        D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE( ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT_PS) |
+                ENCODE_D3D10_SB_INPUT_INTERPOLATION_MODE(Mode));
+        EmitOperand(COperandDst(D3D10_SB_OPERAND_TYPE_INPUT, RegIndex, WriteMask, MinPrecision));
+        ENDINSTRUCTION();
+    }
+    // Emit a PS input declaration for a system interpreted value
+    void EmitPSInputSystemInterpretedValueDecl(UINT RegIndex, UINT WriteMask, D3D10_SB_INTERPOLATION_MODE Mode, D3D10_SB_NAME Name,
+        D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE( ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT_PS_SIV) |
+                ENCODE_D3D10_SB_INPUT_INTERPOLATION_MODE(Mode));
+        EmitOperand(COperandDst(D3D10_SB_OPERAND_TYPE_INPUT, RegIndex, WriteMask, MinPrecision));
+        FUNC(ENCODE_D3D10_SB_NAME(Name));
+        ENDINSTRUCTION();
+    }
+    // Emit a PS input declaration for a system generated value
+    void EmitPSInputSystemGeneratedValueDecl(UINT RegIndex, UINT WriteMask, D3D10_SB_INTERPOLATION_MODE Mode, D3D10_SB_NAME Name,
+        D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE( ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT_PS_SGV) |
+                ENCODE_D3D10_SB_INPUT_INTERPOLATION_MODE(Mode));
+        EmitOperand(COperandDst(D3D10_SB_OPERAND_TYPE_INPUT, RegIndex, WriteMask, MinPrecision));
+        FUNC(ENCODE_D3D10_SB_NAME(Name));
+        ENDINSTRUCTION();
+    }
+    // Emit input coverage mask declaration
+    void EmitInputCoverageMaskDecl(D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT));
+        EmitOperand(COperand(D3D11_SB_OPERAND_TYPE_INPUT_COVERAGE_MASK, MinPrecision));
+        ENDINSTRUCTION();
+    }
+    // Emit inner coverage declaration
+    void EmitInnerCoverageDecl(D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT));
+        EmitOperand(COperand(D3D11_SB_OPERAND_TYPE_INNER_COVERAGE, MinPrecision));
+        ENDINSTRUCTION();
+    }
+    // Emit cycle counter decl
+    void EmitCycleCounterDecl(UINT WriteMask)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT));
+        EmitOperand(COperandDst(WriteMask,D3D11_SB_OPERAND_TYPE_CYCLE_COUNTER));
+        ENDINSTRUCTION();
+    }
+
+    // Emit input primitive id declaration
+    void EmitInputPrimIdDecl(D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT));
+        EmitOperand(COperandDst(D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID, MinPrecision));
+        ENDINSTRUCTION();
+    }
+    // Emit input domain point declaration
+    void EmitInputDomainPointDecl(UINT WriteMask,
+        D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT));
+        EmitOperand(COperandDst(WriteMask,D3D11_SB_OPERAND_TYPE_INPUT_DOMAIN_POINT, MinPrecision));
+        ENDINSTRUCTION();
+    }
+    // Emit and oDepth declaration
+    void EmitODepthDecl(D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_OUTPUT));
+        EmitOperand(COperandDst(D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH, MinPrecision));
+        ENDINSTRUCTION();
+    }
+    // Emit and oDepthGE declaration
+    void EmitODepthDeclGE(D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_OUTPUT));
+        EmitOperand(COperandDst(D3D11_SB_OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL, MinPrecision));
+        ENDINSTRUCTION();
+    }
+    // Emit and oDepthLE declaration
+    void EmitODepthDeclLE(D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_OUTPUT));
+        EmitOperand(COperandDst(D3D11_SB_OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL, MinPrecision));
+        ENDINSTRUCTION();
+    }
+    // Emit an oMask declaration
+    void EmitOMaskDecl(D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_OUTPUT));
+        EmitOperand(COperandDst(D3D10_SB_OPERAND_TYPE_OUTPUT_COVERAGE_MASK, MinPrecision));
+        ENDINSTRUCTION();
+    }
+    // Emit an oStencilRef declaration
+    void EmitOStencilRefDecl(D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_OUTPUT));
+        EmitOperand(COperandDst(D3D11_SB_OPERAND_TYPE_OUTPUT_STENCIL_REF, MinPrecision));
+        ENDINSTRUCTION();
+    }
+    // Emit an output declaration
+    void EmitOutputDecl(UINT RegIndex, UINT WriteMask,
+        D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_OUTPUT));
+        EmitOperand(COperandDst(D3D10_SB_OPERAND_TYPE_OUTPUT, RegIndex, WriteMask, MinPrecision));
+        ENDINSTRUCTION();
+    }
+    // Emit an output declaration for a system interpreted value
+    void EmitOutputSystemInterpretedValueDecl(UINT RegIndex, UINT WriteMask, D3D10_SB_NAME Name,
+        D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_OUTPUT_SIV));
+        EmitOperand(COperandDst(D3D10_SB_OPERAND_TYPE_OUTPUT, RegIndex, WriteMask, MinPrecision));
+        FUNC(ENCODE_D3D10_SB_NAME(Name));
+        ENDINSTRUCTION();
+    }
+    // Emit an output declaration for a system generated value
+    void EmitOutputSystemGeneratedValueDecl(UINT RegIndex, UINT WriteMask, D3D10_SB_NAME Name,
+        D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_OUTPUT_SGV));
+        EmitOperand(COperandDst(D3D10_SB_OPERAND_TYPE_OUTPUT, RegIndex, WriteMask, MinPrecision));
+        FUNC(ENCODE_D3D10_SB_NAME(Name));
+        ENDINSTRUCTION();
+    }
+
+    // Emit an input register indexing range declaration
+    void EmitInputIndexingRangeDecl(UINT RegIndex, UINT Count, UINT WriteMask)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INDEX_RANGE));
+        EmitOperand(COperandDst(D3D10_SB_OPERAND_TYPE_INPUT, RegIndex, WriteMask));
+        FUNC((UINT)Count);
+        ENDINSTRUCTION();
+    }
+
+    // 2D indexing range decl (indexing is for second dimension)
+    void EmitInputIndexingRangeDecl2D(UINT RegIndex, UINT RegIndex2Min, UINT Reg2Count, UINT WriteMask)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INDEX_RANGE));
+        EmitOperand(COperandDst(D3D10_SB_OPERAND_TYPE_INPUT, RegIndex, RegIndex2Min, WriteMask));
+        FUNC((UINT)Reg2Count);
+        ENDINSTRUCTION();
+    }
+
+    // Emit an output register indexing range declaration
+    void EmitOutputIndexingRangeDecl(UINT RegIndex, UINT Count, UINT WriteMask)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INDEX_RANGE));
+        EmitOperand(COperandDst(D3D10_SB_OPERAND_TYPE_OUTPUT, RegIndex, WriteMask));
+        FUNC((UINT)Count);
+        ENDINSTRUCTION();
+    }
+
+    // Emit indexing range decl taking reg type as parameter 
+    // (for things other than plain input or output regs)
+    void EmitIndexingRangeDecl(D3D10_SB_OPERAND_TYPE RegType, UINT RegIndex, UINT Count, UINT WriteMask)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INDEX_RANGE));
+        EmitOperand(COperandDst(RegType, RegIndex, WriteMask));
+        FUNC((UINT)Count);
+        ENDINSTRUCTION();
+    }
+
+    // 2D indexing range decl (indexing is for second dimension)
+    // Emit indexing range decl taking reg type as parameter 
+    // (for things other than plain input or output regs)
+    void EmitIndexingRangeDecl2D(D3D10_SB_OPERAND_TYPE RegType, UINT RegIndex, UINT RegIndex2Min, UINT Reg2Count, UINT WriteMask)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INDEX_RANGE));
+        EmitOperand(COperandDst(RegType, RegIndex, RegIndex2Min, WriteMask));
+        FUNC((UINT)Reg2Count);
+        ENDINSTRUCTION();
+    }
+
+
+    // Emit a temp registers ( r0...r(n-1) ) declaration
+    void EmitTempsDecl(UINT NumTemps)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_TEMPS));
+        FUNC((UINT)NumTemps);
+        ENDINSTRUCTION();
+    }
+
+    // Emit an indexable temp register (x#) declaration
+    void EmitIndexableTempDecl(UINT TempNumber, UINT RegCount, UINT ComponentCount )
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INDEXABLE_TEMP));
+        FUNC((UINT)TempNumber);
+        FUNC((UINT)RegCount);
+        FUNC((UINT)ComponentCount);
+        ENDINSTRUCTION();
+    }
+
+    // Emit a constant buffer (cb#) declaration
+    void EmitConstantBufferDecl(UINT RegIndex, UINT Size, // size 0 means unknown/any size
+                                D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN AccessPattern)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE( ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER) |
+                ENCODE_D3D10_SB_D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN(AccessPattern));
+        EmitOperand(COperand2D(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, RegIndex, Size));
+        ENDINSTRUCTION();
+    }
+
+    // Emit D3D12 constant buffer (cb#) declaration.
+    void EmitIndexableConstantBufferDecl(UINT uCBufferVarIndex, UINT uLB, UINT uUB,
+                                         UINT Size, // size 0 means unknown/any size
+                                         D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN AccessPattern,
+                                         UINT uSpace)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE( ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER) |
+                ENCODE_D3D10_SB_D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN(AccessPattern) );
+        EmitOperand(COperand3D(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, uCBufferVarIndex, uLB, uUB));
+        FUNC(Size);
+        FUNC(uSpace);
+        ENDINSTRUCTION();
+    }
+
+    // Emit Immediate Constant Buffer (icb) declaration
+    void EmitImmediateConstantBufferDecl(UINT Num4Tuples, const UINT* pImmediateConstantBufferData)
+    {
+        m_bExecutableInstruction = FALSE;
+        EmitCustomData( D3D10_SB_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER,
+                        4*Num4Tuples /*2 UINTS will be added during encoding */,
+                        pImmediateConstantBufferData);
+    }
+
+    // Emit a GS input primitive declaration
+    void EmitGSInputPrimitiveDecl(D3D10_SB_PRIMITIVE Primitive)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_GS_INPUT_PRIMITIVE) |
+               ENCODE_D3D10_SB_GS_INPUT_PRIMITIVE(Primitive));
+        ENDINSTRUCTION();
+    }
+
+    // Emit a GS output topology declaration
+    void EmitGSOutputTopologyDecl(D3D10_SB_PRIMITIVE_TOPOLOGY Topology)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY) |
+               ENCODE_D3D10_SB_GS_OUTPUT_PRIMITIVE_TOPOLOGY(Topology));
+        ENDINSTRUCTION();
+    }
+
+    // Emit GS Maximum Output Vertex Count declaration
+    void EmitGSMaxOutputVertexCountDecl(UINT Count)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT));
+        FUNC((UINT)Count);
+        ENDINSTRUCTION();
+    }
+    // Emit input GS instance count declaration
+    void EmitInputGSInstanceCountDecl( UINT Instances )
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_GS_INSTANCE_COUNT));
+        FUNC(Instances);
+        ENDINSTRUCTION();
+    }
+    // Emit input GS instance ID declaration
+    void EmitInputGSInstanceIDDecl(D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT));
+        EmitOperand(COperandDst(D3D11_SB_OPERAND_TYPE_INPUT_GS_INSTANCE_ID, MinPrecision));
+        ENDINSTRUCTION();
+    }
+
+    // Emit global flags declaration
+    void EmitGlobalFlagsDecl(UINT Flags)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_GLOBAL_FLAGS) | 
+               ENCODE_D3D10_SB_GLOBAL_FLAGS(Flags));
+        ENDINSTRUCTION();
+    }
+    // Emit interface function body declaration
+    void EmitFunctionBodyDecl(UINT uFunctionID)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_FUNCTION_BODY));
+        FUNC(uFunctionID);
+        ENDINSTRUCTION();
+    }
+
+    void EmitFunctionTableDecl(UINT uFunctionTableID, UINT uTableSize, UINT *pTableEntries)
+    {
+        m_bExecutableInstruction = FALSE;
+        bool bExtended = (3 + uTableSize) > MAX_INSTRUCTION_LENGTH;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_FUNCTION_TABLE) |
+               ENCODE_D3D10_SB_OPCODE_EXTENDED(bExtended));
+
+        if( bExtended )
+            FUNC(0);
+
+        FUNC(uFunctionTableID);
+        FUNC(uTableSize);
+
+        if( m_Index + uTableSize >= m_BufferSize )
+        {
+            Reserve(uTableSize);
+        }
+
+        memcpy(&m_dwFunc[m_Index],pTableEntries,sizeof(UINT)*uTableSize);
+        m_Index += uTableSize;
+
+        ENDLONGINSTRUCTION(bExtended);
+    }
+
+    void EmitInterfaceDecl(UINT uInterfaceID,
+                           bool bDynamicIndexed,
+                           UINT uArrayLength,
+                           UINT uExpectedTableSize,
+                           __in_range(0, D3D11_SB_MAX_NUM_TYPES) UINT uNumTypes,
+                           __in_ecount(uNumTypes) UINT *pTableEntries)
+    {
+        m_bExecutableInstruction = FALSE;
+        bool bExtended = (4 + uNumTypes) > MAX_INSTRUCTION_LENGTH;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_INTERFACE) |
+               ENCODE_D3D11_SB_INTERFACE_INDEXED_BIT(bDynamicIndexed) |
+               ENCODE_D3D10_SB_OPCODE_EXTENDED(bExtended));
+
+        if( bExtended )
+            FUNC(0);
+
+        FUNC(uInterfaceID);
+        FUNC(uExpectedTableSize);
+        FUNC(ENCODE_D3D11_SB_INTERFACE_TABLE_LENGTH(uNumTypes) |
+             ENCODE_D3D11_SB_INTERFACE_ARRAY_LENGTH(uArrayLength));
+
+        if( m_Index + uNumTypes >= m_BufferSize )
+        {
+            Reserve(uNumTypes);
+        }
+
+        memcpy(&m_dwFunc[m_Index],pTableEntries,sizeof(UINT)*uNumTypes);
+        m_Index += uNumTypes;
+
+        ENDLONGINSTRUCTION(bExtended);
+    }
+    void EmitInterfaceCall(COperandBase &InterfaceOperand,
+                           UINT uFunctionIndex)
+    {
+        m_bExecutableInstruction = TRUE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_INTERFACE_CALL));
+        FUNC(uFunctionIndex);
+        EmitOperand(InterfaceOperand);
+        ENDINSTRUCTION();
+    }
+
+    void EmitInputControlPointCountDecl(UINT Count)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT) | 
+               ENCODE_D3D11_SB_INPUT_CONTROL_POINT_COUNT(Count));
+        ENDINSTRUCTION();
+    }
+
+    void EmitOutputControlPointCountDecl(UINT Count)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT) | 
+               ENCODE_D3D11_SB_OUTPUT_CONTROL_POINT_COUNT(Count));
+        ENDINSTRUCTION();
+    }
+
+    void EmitTessellatorDomainDecl(D3D11_SB_TESSELLATOR_DOMAIN Domain)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_TESS_DOMAIN) | 
+               ENCODE_D3D11_SB_TESS_DOMAIN(Domain));
+        ENDINSTRUCTION();
+    }
+
+    void EmitTessellatorPartitioningDecl(D3D11_SB_TESSELLATOR_PARTITIONING Partitioning)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_TESS_PARTITIONING) | 
+               ENCODE_D3D11_SB_TESS_PARTITIONING(Partitioning));
+        ENDINSTRUCTION();
+    }
+
+    void EmitTessellatorOutputPrimitiveDecl(D3D11_SB_TESSELLATOR_OUTPUT_PRIMITIVE OutputPrimitive)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE) | 
+               ENCODE_D3D11_SB_TESS_OUTPUT_PRIMITIVE(OutputPrimitive));
+        ENDINSTRUCTION();
+    }
+
+    void EmitHSMaxTessFactorDecl(float MaxTessFactor)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_HS_MAX_TESSFACTOR));
+        UINT uTemp = *(UINT*)&MaxTessFactor;
+        FUNC(uTemp);
+        ENDINSTRUCTION();
+    }
+
+    void EmitHSForkPhaseInstanceCountDecl(UINT InstanceCount)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT));
+        FUNC(InstanceCount);
+        ENDINSTRUCTION();    
+    }
+
+    void EmitHSJoinPhaseInstanceCountDecl(UINT InstanceCount)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT));
+        FUNC(InstanceCount);
+        ENDINSTRUCTION();    
+    }
+
+    void EmitHSBeginPhase(D3D10_SB_OPCODE_TYPE Phase)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(Phase));
+        ENDINSTRUCTION();
+    }
+
+    void EmitInputOutputControlPointIDDecl(D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT));
+        EmitOperand(COperandDst(D3D11_SB_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID, MinPrecision));
+        ENDINSTRUCTION();
+    }
+
+    void EmitInputForkInstanceIDDecl(D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT));
+        EmitOperand(COperandDst(D3D11_SB_OPERAND_TYPE_INPUT_FORK_INSTANCE_ID, MinPrecision));
+        ENDINSTRUCTION();
+    }
+
+    void EmitInputJoinInstanceIDDecl(D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT));
+        EmitOperand(COperandDst(D3D11_SB_OPERAND_TYPE_INPUT_JOIN_INSTANCE_ID, MinPrecision));
+        ENDINSTRUCTION();
+    }
+
+    void EmitThreadGroupDecl(UINT x, UINT y, UINT z)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_THREAD_GROUP));
+        FUNC(x);
+        FUNC(y);
+        FUNC(z);
+        ENDINSTRUCTION();
+    }
+
+    void EmitInputThreadIDDecl(UINT WriteMask,
+        D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT));
+        EmitOperand(COperandDst(WriteMask, D3D11_SB_OPERAND_TYPE_INPUT_THREAD_ID, MinPrecision));
+        ENDINSTRUCTION();
+    }
+
+    void EmitInputThreadGroupIDDecl(UINT WriteMask,
+        D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT));
+        EmitOperand(COperandDst(WriteMask, D3D11_SB_OPERAND_TYPE_INPUT_THREAD_GROUP_ID, MinPrecision));
+        ENDINSTRUCTION();
+    }
+
+    void EmitInputThreadIDInGroupDecl(UINT WriteMask,
+        D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT));
+        EmitOperand(COperandDst(WriteMask, D3D11_SB_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP, MinPrecision));
+        ENDINSTRUCTION();
+    }
+
+    void EmitInputThreadIDInGroupFlattenedDecl(D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT));
+        EmitOperand(COperandDst(D3D11_SB_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED, MinPrecision));
+        ENDINSTRUCTION();
+    }
+
+    void EmitTypedUnorderedAccessViewDecl(D3D10_SB_RESOURCE_DIMENSION Dimension, UINT URegIndex,
+                          D3D10_SB_RESOURCE_RETURN_TYPE ReturnTypeForX,
+                          D3D10_SB_RESOURCE_RETURN_TYPE ReturnTypeForY,
+                          D3D10_SB_RESOURCE_RETURN_TYPE ReturnTypeForZ,
+                          D3D10_SB_RESOURCE_RETURN_TYPE ReturnTypeForW,
+                          UINT Flags)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED) |
+               ENCODE_D3D10_SB_RESOURCE_DIMENSION(Dimension) |
+               ENCODE_D3D11_SB_RESOURCE_FLAGS(Flags));
+        EmitOperand(COperand(D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW, URegIndex));
+        FUNC(ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(ReturnTypeForX, 0) |
+             ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(ReturnTypeForY, 1) |
+             ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(ReturnTypeForZ, 2) |
+             ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(ReturnTypeForW, 3));
+        ENDINSTRUCTION();
+    }
+    // Emit D3D12 UAV declaration.
+    void EmitIndexableTypedUnorderedAccessViewDecl(UINT uTable, UINT uTableLB, UINT uTableUB,
+                                                   D3D10_SB_RESOURCE_DIMENSION Dimension, 
+                                                   D3D10_SB_RESOURCE_RETURN_TYPE ReturnTypeForX,
+                                                   D3D10_SB_RESOURCE_RETURN_TYPE ReturnTypeForY,
+                                                   D3D10_SB_RESOURCE_RETURN_TYPE ReturnTypeForZ,
+                                                   D3D10_SB_RESOURCE_RETURN_TYPE ReturnTypeForW,
+                                                   UINT Flags,
+                                                   UINT uSpace)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED) |
+               ENCODE_D3D10_SB_RESOURCE_DIMENSION(Dimension) |
+               ENCODE_D3D11_SB_RESOURCE_FLAGS(Flags));
+        EmitOperand(COperand3D(D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW, uTable, uTableLB, uTableUB));
+        FUNC(ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(ReturnTypeForX, 0) |
+             ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(ReturnTypeForY, 1) |
+             ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(ReturnTypeForZ, 2) |
+             ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(ReturnTypeForW, 3));
+        FUNC(uSpace);
+        ENDINSTRUCTION();
+    }
+
+    void EmitRawUnorderedAccessViewDecl(UINT URegIndex, UINT Flags)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW) |
+               ENCODE_D3D11_SB_RESOURCE_FLAGS(Flags));
+        EmitOperand(COperand(D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW, URegIndex));
+        ENDINSTRUCTION();
+    }
+    // Emit D3D12 raw UAV declaration.
+    void EmitIndexableRawUnorderedAccessViewDecl(UINT uTable, UINT uTableLB, UINT uTableUB, UINT Flags, UINT uSpace)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW) |
+               ENCODE_D3D11_SB_RESOURCE_FLAGS(Flags));
+        EmitOperand(COperand3D(D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW, uTable, uTableLB, uTableUB));
+        FUNC(uSpace);
+        ENDINSTRUCTION();
+    }
+
+    void EmitStructuredUnorderedAccessViewDecl(UINT URegIndex, UINT ByteStride, UINT Flags )
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED)|
+            ENCODE_D3D11_SB_RESOURCE_FLAGS(Flags));
+        EmitOperand(COperand(D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW, URegIndex));
+        FUNC(ByteStride);
+        ENDINSTRUCTION();
+    }
+    // Emit D3D12 structured UAV declaration.
+    void EmitIndexableStructuredUnorderedAccessViewDecl(UINT uTable, UINT uTableLB, UINT uTableUB,
+                                                        UINT ByteStride, UINT Flags, UINT uSpace )
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED)|
+            ENCODE_D3D11_SB_RESOURCE_FLAGS(Flags));
+        EmitOperand(COperand3D(D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW, uTable, uTableLB, uTableUB));
+        FUNC(ByteStride);
+        FUNC(uSpace);
+        ENDINSTRUCTION();
+    }
+
+    void EmitRawThreadGroupSharedMemoryDecl(UINT GRegIndex, UINT ByteCount )
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW));
+        EmitOperand(COperand(D3D11_SB_OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY, GRegIndex));
+        FUNC(ByteCount);
+        ENDINSTRUCTION();
+    }
+
+    void EmitStructuredThreadGroupSharedMemoryDecl(UINT GRegIndex, UINT ByteStride, UINT StructCount )
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED));
+        EmitOperand(COperand(D3D11_SB_OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY, GRegIndex));
+        FUNC(ByteStride);
+        FUNC(StructCount);
+        ENDINSTRUCTION();
+    }
+
+    void EmitRawShaderResourceViewDecl(UINT TRegIndex)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_RESOURCE_RAW));
+        EmitOperand(COperand(D3D10_SB_OPERAND_TYPE_RESOURCE, TRegIndex));
+        ENDINSTRUCTION();
+    }
+
+    // Emit D3D12 byte address buffer declaration
+    void EmitIndexableRawShaderResourceViewDecl(UINT uTable, UINT uTableLB, UINT uTableUB, UINT uSpace)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_RESOURCE_RAW));
+        EmitOperand(COperand3D(D3D10_SB_OPERAND_TYPE_RESOURCE, uTable, uTableLB, uTableUB));
+        FUNC(uSpace);
+        ENDINSTRUCTION();
+    }
+
+    void EmitStructuredShaderResourceViewDecl(UINT TRegIndex, UINT ByteStride)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_RESOURCE_STRUCTURED));
+        EmitOperand(COperand(D3D10_SB_OPERAND_TYPE_RESOURCE, TRegIndex));
+        FUNC(ByteStride);
+        ENDINSTRUCTION();
+    }
+
+    // Emit D3D12 structured buffer declaration
+    void EmitIndexableStructuredShaderResourceViewDecl(UINT uTable, UINT uTableLB, UINT uTableUB, UINT ByteStride, UINT uSpace)
+    {
+        m_bExecutableInstruction = FALSE;
+        OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_RESOURCE_STRUCTURED));
+        EmitOperand(COperand3D(D3D10_SB_OPERAND_TYPE_RESOURCE, uTable, uTableLB, uTableUB));
+        FUNC(ByteStride);
+        FUNC(uSpace);
+        ENDINSTRUCTION();
+    }
+
+    // Emit an instruction. Custom-data is not handled by this function.
+    void EmitInstruction(const CInstruction& instruction);
+    // Emit an operand
+    void EmitOperand(const COperandBase& operand);
+    // Emit an instruction without operands
+    void Emit(UINT OpCode)
+    {
+        OPCODE(OpCode);
+        ENDINSTRUCTION();
+    }
+    void StartComplexEmit(UINT OpCode,
+                          UINT ReserveCount = MAX_INSTRUCTION_LENGTH)
+    {
+        OPCODE(OpCode);
+        Reserve(ReserveCount);
+    }
+    void AddComplexEmit(UINT Data)
+    {
+        FUNC(Data);
+    }
+    void EndComplexEmit(bool bPatchLength = false)
+    {
+        ENDLONGINSTRUCTION(bPatchLength, !bPatchLength);
+    }
+    UINT GetComplexEmitPosition()
+    {
+        return m_Index;
+    }
+    void UpdateComplexEmitPosition(UINT Pos,
+                                   UINT Data)
+    {
+        if (Pos < m_BufferSize)
+        {
+            m_dwFunc[Pos] = Data;
+        }
+    }
+    void EmitCustomData( D3D10_SB_CUSTOMDATA_CLASS CustomDataClass,
+                        UINT SizeInUINTs /*2 UINTS will be added during encoding */,
+                        const UINT* pCustomData)
+    {
+        UINT FullSizeInUINTs = SizeInUINTs + 2; // include opcode and size
+        if( FullSizeInUINTs < SizeInUINTs || FullSizeInUINTs + m_Index < FullSizeInUINTs )   // check for overflow
+        {
+            throw E_FAIL;
+        }
+        if( m_Index + FullSizeInUINTs >= m_BufferSize ) // If custom data is going to overflow the buffer, reserve more memory
+        {
+            Reserve(FullSizeInUINTs);
+        }
+        __analysis_assume(m_Index + FullSizeInUINTs < m_BufferSize);  // Otherwise there's a bug in Reserve()
+        m_dwFunc[m_Index++] = ENCODE_D3D10_SB_CUSTOMDATA_CLASS(CustomDataClass);
+        m_dwFunc[m_Index++] = FullSizeInUINTs;
+        memcpy(&m_dwFunc[m_Index],pCustomData,sizeof(UINT)*SizeInUINTs);
+        m_Index += SizeInUINTs;
+        if (m_Index >= m_BufferSize) // If custom data is exactly fully filled the buffer, reserve more memory
+        {
+            Reserve(1024);
+        }
+    }
+    // Returns number of executable instructions in the current shader
+    UINT GetNumExecutableInstructions() {return m_NumExecutableInstructions;}
+protected:
+    void OPCODE(UINT x)
+    {
+        if (m_Index < m_BufferSize)
+        {
+            m_dwFunc[m_Index] = x;
+            m_StartOpIndex = m_Index++;
+        }
+        if (m_Index >= m_BufferSize)
+            Reserve(1024);
+    }
+    // Should be called after end of each instruction
+    void ENDINSTRUCTION()
+    {
+        if (m_StartOpIndex < m_Index)
+        {
+            m_dwFunc[m_StartOpIndex] |= ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(m_Index - m_StartOpIndex);
+            Reserve(MAX_INSTRUCTION_LENGTH);
+            m_StatementIndex++;
+            if (m_bExecutableInstruction)
+                m_NumExecutableInstructions++;
+            m_bExecutableInstruction = true;
+        }
+    }
+    void ENDLONGINSTRUCTION(bool bExtendedLength,
+                            bool bBaseLength = true)
+    {
+        if (m_StartOpIndex < m_Index)
+        {
+            if (bBaseLength)
+            {
+                m_dwFunc[m_StartOpIndex] |= ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(m_Index - m_StartOpIndex);
+            }
+            if( bExtendedLength )
+            {
+                __analysis_assume(m_StartOpIndex + 1 < m_Index);
+                m_dwFunc[m_StartOpIndex + 1] = m_Index - m_StartOpIndex;
+            }
+            Reserve(MAX_INSTRUCTION_LENGTH);
+            m_StatementIndex++;
+            if (m_bExecutableInstruction)
+                m_NumExecutableInstructions++;
+            m_bExecutableInstruction = true;
+        }
+    }
+    void FUNC(UINT x)
+    {
+        if (m_Index < m_BufferSize)
+            m_dwFunc[m_Index++] = x;
+        if (m_Index >= m_BufferSize)
+            Reserve(1024);
+    }
+    // Prepare assembler for a new shader
+    void Reset()
+    {
+        m_Index = 0;
+        m_StartOpIndex = 0;
+        m_StatementIndex = 1;
+        m_NumExecutableInstructions = 0;
+        m_bExecutableInstruction = TRUE;
+    }
+    // Reserve SizeInUINTs UINTs in the m_dwFunc array
+    void Reserve(UINT SizeInUINTs)
+    {
+        // The following overflow check may be sligltly over-cautious when (m_Index + SizeInUINTs < m_BufferSize),
+        // but this only matters when m_BufferSize cannot be grown one more step without overflowing.
+        UINT NewSize = m_BufferSize + SizeInUINTs + 1024;
+        if( m_Index > m_BufferSize ||                       // invalid state
+            (m_BufferSize + SizeInUINTs) < m_BufferSize ||  // overflow with adding SizeInUINTs
+            NewSize < (m_BufferSize + SizeInUINTs) )        // overflow with adding 1024
+        {
+            throw E_FAIL;
+        }
+        if (m_BufferSize < (m_Index + SizeInUINTs))
+        {
+            UINT* pNewBuffer = (UINT*)malloc(NewSize*sizeof(UINT));
+            if (pNewBuffer == NULL)
+            {
+                throw E_OUTOFMEMORY;
+            }
+            memcpy(pNewBuffer, m_dwFunc, sizeof(UINT)*m_Index);
+            free(m_dwFunc);
+            m_dwFunc = pNewBuffer;
+            m_BufferSize = NewSize;
+        }
+    }
+    // Buffer where the binary representation is built
+    __field_ecount_part(m_BufferSize, m_Index) UINT*  m_dwFunc;
+    // Index where to place the next token in the m_dwFunc array
+    UINT    m_Index;
+    // Index of the start of the current instruction in the m_dwFunc array
+    UINT    m_StartOpIndex;
+    // Current buffer size in UINTs
+    UINT    m_BufferSize;
+    // Current statement index of the current vertex shader
+    UINT    m_StatementIndex;
+    // Number of executable instructions in the shader
+    UINT    m_NumExecutableInstructions;
+    // "true" when the current instruction is executable
+    bool    m_bExecutableInstruction;
+};
+
+//*****************************************************************************
+//
+//  CShaderCodeParser
+//
+//*****************************************************************************
+
+class CShaderCodeParser
+{
+public:
+    CShaderCodeParser():
+        m_pCurrentToken(NULL),
+        m_pShaderCode(NULL),
+        m_pShaderEndToken(NULL)
+    {
+        InitInstructionInfo();
+    }
+    CShaderCodeParser(CONST CShaderToken* pBuffer):
+        m_pCurrentToken(NULL),
+        m_pShaderCode(NULL),
+        m_pShaderEndToken(NULL)
+    {
+        InitInstructionInfo();
+        SetShader(pBuffer);
+    }
+    ~CShaderCodeParser()    {}
+    void SetShader(CONST CShaderToken* pBuffer);
+    void ParseInstruction(CInstruction* pInstruction);
+    void ParseIndex(COperandIndex* pOperandIndex, D3D10_SB_OPERAND_INDEX_REPRESENTATION IndexType);
+    void ParseOperand(COperandBase* pOperand);
+    BOOL EndOfShader() {return m_pCurrentToken >= m_pShaderEndToken;}
+    D3D10_SB_TOKENIZED_PROGRAM_TYPE ShaderType();
+    UINT ShaderMinorVersion();
+    UINT ShaderMajorVersion();
+    UINT ShaderLengthInTokens();
+    UINT CurrentTokenOffset();
+    UINT CurrentTokenOffsetInBytes() { return CurrentTokenOffset() * sizeof(CShaderToken); }
+    void SetCurrentTokenOffset(UINT Offset);
+
+    CONST CShaderToken* ParseOperandAt(COperandBase* pOperand,
+                                       CONST CShaderToken* pBuffer,
+                                       CONST CShaderToken* pBufferEnd)
+    {
+        CShaderToken* pCurTok = m_pCurrentToken;
+        CShaderToken* pEndTok = m_pShaderEndToken;
+        CShaderToken* pRet;
+
+        m_pCurrentToken = (CShaderToken*)pBuffer;
+        m_pShaderEndToken = (CShaderToken*)pBufferEnd;
+
+        ParseOperand(pOperand);
+        pRet = m_pCurrentToken;
+
+        m_pCurrentToken = pCurTok;
+        m_pShaderEndToken = pEndTok;
+
+        return pRet;
+    }
+    
+protected:
+    CShaderToken*   m_pCurrentToken;
+    CShaderToken*   m_pShaderCode;
+    // Points to the last token of the current shader
+    CShaderToken*   m_pShaderEndToken;
+};
+
+}; // name space D3D10ShaderBinary

+ 35 - 0
projects/dxilconv/include/Support/DXIncludes.h

@@ -0,0 +1,35 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// DXIncludes.h                                                              //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// This is a common include for DXBC/Windows related things.                 //
+//                                                                           //
+// IMPORTANT: do not add LLVM/Clang or DXIL files to this file.              //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+
+#pragma once
+
+// This is a platform-specific file.
+// Do not add LLVM/Clang or DXIL files to this file.
+
+#define NOMINMAX 1
+#define WIN32_LEAN_AND_MEAN 1
+#define VC_EXTRALEAN 1
+#include <windows.h>
+#include <strsafe.h>
+
+#include <dxgitype.h>
+#include <d3dcommon.h>
+#include <d3d11.h>
+#include <d3d12.h>
+#include "dxc/Support/d3dx12.h"
+#include "DxbcSignatures.h"
+#include <d3dcompiler.h>
+#include <wincrypt.h>
+#include <d3d12TokenizedProgramFormat.hpp>
+#include <ShaderBinary/ShaderBinary.h>

+ 141 - 0
projects/dxilconv/include/Support/DxbcSignatures.h

@@ -0,0 +1,141 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// DxbcSignatures.h                                                          //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// Declaration of shader parameter structs in DXBC container.                //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#pragma once
+
+typedef D3D_NAME D3D10_NAME;
+typedef D3D_REGISTER_COMPONENT_TYPE D3D10_REGISTER_COMPONENT_TYPE;
+
+
+typedef struct _D3D11_INTERNALSHADER_PARAMETER_FOR_GS
+{
+  UINT Stream;                                    // Stream index (parameters must appear in non-decreasing stream order)
+  UINT SemanticName;                              // Offset to LPCSTR
+  UINT SemanticIndex;                             // Semantic Index
+  D3D10_NAME SystemValue;                         // Internally defined enumeration
+  D3D10_REGISTER_COMPONENT_TYPE  ComponentType;   // Type of  of bits
+  UINT Register;                                  // Register Index
+  BYTE Mask;                                      // Combination of D3D10_COMPONENT_MASK values
+
+  // The following unioned fields, NeverWrites_Mask and AlwaysReads_Mask, are exclusively used for 
+  // output signatures or input signatures, respectively.
+  //
+  // For an output signature, NeverWrites_Mask indicates that the shader the signature belongs to never 
+  // writes to the masked components of the output register.  Meaningful bits are the ones set in Mask above.
+  //
+  // For an input signature, AlwaysReads_Mask indicates that the shader the signature belongs to always
+  // reads the masked components of the input register.  Meaningful bits are the ones set in the Mask above.
+  //
+  // This allows many shaders to share similar signatures even though some of them may not happen to use
+  // all of the inputs/outputs - something which may not be obvious when authored.  The NeverWrites_Mask
+  // and AlwaysReads_Mask can be checked in a debug layer at runtime for the one interesting case: that a 
+  // shader that always reads a value is fed by a shader that always writes it.  Cases where shaders may
+  // read values or may not cannot be validated unfortunately.  
+  //
+  // In scenarios where a signature is being passed around standalone (so it isn't tied to input or output 
+  // of a given shader), this union can be zeroed out in the absence of more information.  This effectively
+  // forces off linkage validation errors with the signature, since if interpreted as a input or output signature
+  // somehow, since the meaning on output would be "everything is always written" and on input it would be 
+  // "nothing is always read".
+  union
+  {
+    BYTE NeverWrites_Mask;  // For an output signature, the shader the signature belongs to never 
+                            // writes the masked components of the output register.
+    BYTE AlwaysReads_Mask;  // For an input signature, the shader the signature belongs to always
+                            // reads the masked components of the input register.
+  };
+} D3D11_INTERNALSHADER_PARAMETER_FOR_GS, *LPD3D11_INTERNALSHADER_PARAMETER_FOR_GS;
+
+typedef struct _D3D11_INTERNALSHADER_PARAMETER_11_1
+{
+  UINT Stream;                                    // Stream index (parameters must appear in non-decreasing stream order)
+  UINT SemanticName;                              // Offset to LPCSTR
+  UINT SemanticIndex;                             // Semantic Index
+  D3D10_NAME SystemValue;                         // Internally defined enumeration
+  D3D10_REGISTER_COMPONENT_TYPE  ComponentType;   // Type of  of bits
+  UINT Register;                                  // Register Index
+  BYTE Mask;                                      // Combination of D3D10_COMPONENT_MASK values
+
+  // The following unioned fields, NeverWrites_Mask and AlwaysReads_Mask, are exclusively used for 
+  // output signatures or input signatures, respectively.
+  //
+  // For an output signature, NeverWrites_Mask indicates that the shader the signature belongs to never 
+  // writes to the masked components of the output register.  Meaningful bits are the ones set in Mask above.
+  //
+  // For an input signature, AlwaysReads_Mask indicates that the shader the signature belongs to always
+  // reads the masked components of the input register.  Meaningful bits are the ones set in the Mask above.
+  //
+  // This allows many shaders to share similar signatures even though some of them may not happen to use
+  // all of the inputs/outputs - something which may not be obvious when authored.  The NeverWrites_Mask
+  // and AlwaysReads_Mask can be checked in a debug layer at runtime for the one interesting case: that a 
+  // shader that always reads a value is fed by a shader that always writes it.  Cases where shaders may
+  // read values or may not cannot be validated unfortunately.  
+  //
+  // In scenarios where a signature is being passed around standalone (so it isn't tied to input or output 
+  // of a given shader), this union can be zeroed out in the absence of more information.  This effectively
+  // forces off linkage validation errors with the signature, since if interpreted as a input or output signature
+  // somehow, since the meaning on output would be "everything is always written" and on input it would be 
+  // "nothing is always read".
+  union
+  {
+    BYTE NeverWrites_Mask;  // For an output signature, the shader the signature belongs to never 
+                            // writes the masked components of the output register.
+    BYTE AlwaysReads_Mask;  // For an input signature, the shader the signature belongs to always
+                            // reads the masked components of the input register.
+  };
+
+  D3D_MIN_PRECISION MinPrecision;                 // Minimum precision of input/output data
+} D3D11_INTERNALSHADER_PARAMETER_11_1, *LPD3D11_INTERNALSHADER_PARAMETER_11_1;
+
+
+typedef struct _D3D10_INTERNALSHADER_SIGNATURE
+{
+  UINT Parameters;      // Number of parameters
+  UINT ParameterInfo;   // Offset to D3D10_INTERNALSHADER_PARAMETER[Parameters]
+} D3D10_INTERNALSHADER_SIGNATURE, *LPD3D10_INTERNALSHADER_SIGNATURE;
+
+typedef struct _D3D10_INTERNALSHADER_PARAMETER
+{
+  UINT SemanticName;                              // Offset to LPCSTR
+  UINT SemanticIndex;                             // Semantic Index
+  D3D10_NAME SystemValue;                         // Internally defined enumeration
+  D3D10_REGISTER_COMPONENT_TYPE  ComponentType;   // Type of  of bits
+  UINT Register;                                  // Register Index
+  BYTE Mask;                                      // Combination of D3D10_COMPONENT_MASK values
+
+  // The following unioned fields, NeverWrites_Mask and AlwaysReads_Mask, are exclusively used for 
+  // output signatures or input signatures, respectively.
+  //
+  // For an output signature, NeverWrites_Mask indicates that the shader the signature belongs to never 
+  // writes to the masked components of the output register.  Meaningful bits are the ones set in Mask above.
+  //
+  // For an input signature, AlwaysReads_Mask indicates that the shader the signature belongs to always
+  // reads the masked components of the input register.  Meaningful bits are the ones set in the Mask above.
+  //
+  // This allows many shaders to share similar signatures even though some of them may not happen to use
+  // all of the inputs/outputs - something which may not be obvious when authored.  The NeverWrites_Mask
+  // and AlwaysReads_Mask can be checked in a debug layer at runtime for the one interesting case: that a 
+  // shader that always reads a value is fed by a shader that always writes it.  Cases where shaders may
+  // read values or may not cannot be validated unfortunately.  
+  //
+  // In scenarios where a signature is being passed around standalone (so it isn't tied to input or output 
+  // of a given shader), this union can be zeroed out in the absence of more information.  This effectively
+  // forces off linkage validation errors with the signature, since if interpreted as a input or output signature
+  // somehow, since the meaning on output would be "everything is always written" and on input it would be 
+  // "nothing is always read".
+  union
+  {
+    BYTE NeverWrites_Mask;  // For an output signature, the shader the signature belongs to never 
+                            // writes the masked components of the output register.
+    BYTE AlwaysReads_Mask;  // For an input signature, the shader the signature belongs to always
+                            // reads the masked components of the input register.
+  };
+} D3D10_INTERNALSHADER_PARAMETER, *LPD3D10_INTERNALSHADER_PARAMETER;

+ 42 - 0
projects/dxilconv/include/Tracing/CMakeLists.txt

@@ -0,0 +1,42 @@
+# Copyright (C) Microsoft Corporation. All rights reserved.
+# This file is distributed under the University of Illinois Open Source License. See LICENSE.TXT for details.
+# Generate ETW instrumentation.
+
+# Create the header in a temporary file and only update when necessary,
+# to avoid invalidating targets that depend on it.
+add_custom_command(
+  OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/dxc/Tracing/tmpdxcruntimeetw.h
+  COMMAND mc -r ${CMAKE_CURRENT_BINARY_DIR} -h ${CMAKE_CURRENT_BINARY_DIR} -p DxcRuntimeEtw_ -um -z tmpdxcruntimeetw ${CMAKE_CURRENT_SOURCE_DIR}/DxcRuntime.man
+  DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/DxcRuntime.man
+  COMMENT "Building instrumentation manifest ..."
+)
+add_custom_command(
+  OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/dxc/Tracing/DxcRuntimeEtw.h
+  COMMAND ${CMAKE_COMMAND} -E copy_if_different
+        ${CMAKE_CURRENT_BINARY_DIR}/tmpdxcruntimeetw.h
+        ${CMAKE_CURRENT_BINARY_DIR}/DxcRuntimeEtw.h
+  COMMAND ${CMAKE_COMMAND} -E copy_if_different
+        ${CMAKE_CURRENT_BINARY_DIR}/tmpdxcruntimeetw.rc
+        ${CMAKE_CURRENT_BINARY_DIR}/DxcRuntimeEtw.rc
+  COMMAND ${CMAKE_COMMAND} -E copy_if_different
+        ${CMAKE_CURRENT_BINARY_DIR}/tmpdxcruntimeetwTEMP.bin
+        ${CMAKE_CURRENT_BINARY_DIR}/DxcRuntimeEtwtemp.BIN
+  COMMAND ${CMAKE_COMMAND} -E copy_if_different
+        ${CMAKE_CURRENT_BINARY_DIR}/tmpdxcruntimeetw_msg00001.bin
+        ${CMAKE_CURRENT_BINARY_DIR}/DxcRuntimeEtw_msg00001.bin
+  DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/dxc/Tracing/tmpdxcruntimeetw.h
+  COMMENT "Updating instrumentation manifest ..."
+)
+
+set_source_files_properties(${CMAKE_CURRENT_BINARY_DIR}/DxcRuntimeEtw.h PROPERTIES GENERATED 1)
+set_source_files_properties(${CMAKE_CURRENT_BINARY_DIR}/DxcRuntimeEtw.rc PROPERTIES GENERATED 1)
+set_source_files_properties(${CMAKE_CURRENT_BINARY_DIR}/DxcRuntimeEtwtemp.bin PROPERTIES GENERATED 1)
+set_source_files_properties(${CMAKE_CURRENT_BINARY_DIR}/DxcRuntimeEtw_msg00001.bin PROPERTIES GENERATED 1)
+
+add_custom_target(DxcRuntimeEtw
+  DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/dxc/Tracing/DxcRuntimeEtw.h
+  SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/DxcRuntime.man
+)
+
+# Not quite library, but close enough.
+set_target_properties(DxcRuntimeEtw PROPERTIES FOLDER "Dxilconv libraries")

+ 154 - 0
projects/dxilconv/include/Tracing/DxcRuntime.man

@@ -0,0 +1,154 @@
+<?xml version='1.0' encoding='utf-8' standalone='yes'?>
+<instrumentationManifest xmlns="http://schemas.microsoft.com/win/2004/08/events">
+  <instrumentation
+      xmlns:win="http://manifests.microsoft.com/win/2004/08/windows/events"
+      xmlns:xs="http://www.w3.org/2001/XMLSchema"
+      xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+      >
+    <events xmlns="http://schemas.microsoft.com/win/2004/08/events">
+      <provider
+          guid="{af2ca688-62aa-48e9-8bf6-a0ca0cae2354}"
+          message="$(string.eventProviderName)"
+          messageFileName="%SystemRoot%\system32\dxcompilerp.dll"
+          name="Microsoft-Windows-DxcRuntime-API"
+          resourceFileName="%SystemRoot%\system32\dxcompilerp.dll"
+          symbol="MICROSOFT_WINDOWS_DXCRUNTIME_PROVIDER"
+          >
+        <channels>
+          <channel
+              chid="DxcRuntimeAnalytic"
+              name="Microsoft-Windows-DxcRuntime-API/Analytic"
+              type="Analytic"
+              />
+        </channels>
+        <tasks>
+          <task
+              name="DxcRuntimeInitialization"
+              value="1"
+              />
+          <task
+              name="DxcRuntimeShutdown"
+              value="2"
+              />
+          <task
+              name="DxcTranslate"
+              value="3"
+              >
+            <opcodes>
+              <opcode
+                  message="$(string.Task.DxcTranslate.TranslateStats)"
+                  name="TranslateStats"
+                  symbol="TranslateStats"
+                  value="100"
+                  />
+            </opcodes>
+          </task>
+        </tasks>
+        <events>
+          <event
+              channel="DxcRuntimeAnalytic"
+              level="win:Informational"
+              opcode="win:Start"
+              symbol="DxcRuntimeInitialization_Start"
+              task="DxcRuntimeInitialization"
+              value="0"
+              />
+          <event
+              channel="DxcRuntimeAnalytic"
+              level="win:Informational"
+              opcode="win:Stop"
+              symbol="DxcRuntimeInitialization_Stop"
+              task="DxcRuntimeInitialization"
+              template="OperationResultTemplate"
+              value="1"
+              />
+          <event
+              channel="DxcRuntimeAnalytic"
+              level="win:Informational"
+              opcode="win:Start"
+              symbol="DxcRuntimeShutdown_Start"
+              task="DxcRuntimeShutdown"
+              value="2"
+              />
+          <event
+              channel="DxcRuntimeAnalytic"
+              level="win:Informational"
+              opcode="win:Stop"
+              symbol="DxcRuntimeShutdown_Stop"
+              task="DxcRuntimeShutdown"
+              template="OperationResultTemplate"
+              value="3"
+              />
+          <event
+              channel="DxcRuntimeAnalytic"
+              level="win:Informational"
+              opcode="win:Start"
+              symbol="DxcTranslate_Start"
+              task="DxcTranslate"
+              value="4"
+              />
+          <event
+              channel="DxcRuntimeAnalytic"
+              level="win:Informational"
+              opcode="win:Stop"
+              symbol="DxcTranslate_Stop"
+              task="DxcTranslate"
+              template="OperationResultTemplate"
+              value="5"
+              />
+          <event
+              channel="DxcRuntimeAnalytic"
+              level="win:Informational"
+              opcode="TranslateStats"
+              symbol="DxcTranslate_TranslateStats"
+              task="DxcTranslate"
+              template="TranslateStatsTemplate"
+              value="6"
+              />
+        </events>
+        <templates>
+          <template tid="OperationResultTemplate">
+            <data
+                inType="win:Int32"
+                name="errorCode"
+                outType="win:HResult"
+                />
+          </template>
+          <template tid="TranslateStatsTemplate">
+            <data
+                inType="win:UInt32"
+                name="inputByteCount"
+                />
+            <data
+                inType="win:UInt32"
+                name="inputShaderSize"
+                />
+            <data
+                inType="win:Binary"
+                length="inputShaderSize"
+                name="inputShader"
+                />
+            <data
+                inType="win:UInt32"
+                name="outputByteCount"
+                />
+          </template>
+        </templates>
+      </provider>
+    </events>
+  </instrumentation>
+  <localization>
+    <resources culture="en-US">
+      <stringTable>
+        <string
+            id="eventProviderName"
+            value="Microsoft-Windows-DxcRuntime-API"
+            />
+        <string
+            id="Task.DxcTranslate.TranslateStats"
+            value="Translation statistics."
+            />
+      </stringTable>
+    </resources>
+  </localization>
+</instrumentationManifest>

+ 4 - 0
projects/dxilconv/lib/CMakeLists.txt

@@ -0,0 +1,4 @@
+add_subdirectory(DxilConvPasses)
+add_subdirectory(DxbcConverter)
+add_subdirectory(ShaderBinary)
+

+ 14 - 0
projects/dxilconv/lib/DxbcConverter/CMakeLists.txt

@@ -0,0 +1,14 @@
+# Build DxbcConverter.lib.
+
+find_package(D3D12 REQUIRED)
+
+add_dxilconv_project_library(DxbcConverter
+  DxbcConverter.cpp
+  DxbcUtil.cpp
+)
+
+add_dependencies(DxbcConverter intrinsics_gen DxcRuntimeEtw)
+
+include_directories(
+    ${D3D12_INCLUDE_DIRS}
+)

+ 7332 - 0
projects/dxilconv/lib/DxbcConverter/DxbcConverter.cpp

@@ -0,0 +1,7332 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// DxbcConverter.cpp                                                         //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// Implements the DirectX DXBC to DXIL converter.                            //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#include "llvm/Support/Debug.h"
+#include "DxbcConverterImpl.h"
+#include "DxilConvPasses/DxilCleanup.h"
+#include "dxc/DxilContainer/DxilContainer.h"
+#include "dxc/DxilContainer/DxilContainerAssembler.h"
+#include "dxc/DxilContainer/DxilContainerReader.h"
+
+#define DXBCCONV_DBG   0
+
+
+namespace hlsl {
+
+__override HRESULT STDMETHODCALLTYPE DxbcConverter::Convert(_In_reads_bytes_(DxbcSize) LPCVOID pDxbc,
+                                               _In_ UINT32 DxbcSize,
+                                               _In_opt_z_ LPCWSTR pExtraOptions,
+                                               _Outptr_result_bytebuffer_maybenull_(*pDxilSize) LPVOID *ppDxil,
+                                               _Out_ UINT32 *pDxilSize,
+                                               _Outptr_result_maybenull_z_ LPWSTR *ppDiag) {
+    DxcThreadMalloc TM(m_pMalloc);
+    LARGE_INTEGER start, end;
+    QueryPerformanceCounter(&start);
+    DxcRuntimeEtw_DxcTranslate_Start();
+    HRESULT hr = S_OK;
+    try {
+      sys::fs::MSFileSystem *pFSPtr;
+      IFT(CreateMSFileSystemForDisk(&pFSPtr));
+      unique_ptr<sys::fs::MSFileSystem> pFS(pFSPtr);
+      sys::fs::AutoPerThreadSystem pTS(pFS.get());
+      IFTLLVM(pTS.error_code());
+
+      struct StdErrFlusher {
+        ~StdErrFlusher() { dbgs().flush(); }
+      } S;
+
+      ConvertImpl(pDxbc, DxbcSize, pExtraOptions, ppDxil, pDxilSize, ppDiag);
+
+      DxcRuntimeEtw_DxcTranslate_TranslateStats(DxbcSize, DxbcSize, (const BYTE *)pDxbc, *pDxilSize);
+      hr = S_OK;
+    }
+    CATCH_CPP_ASSIGN_HRESULT();
+    DxcRuntimeEtw_DxcTranslate_Stop(hr);
+    QueryPerformanceCounter(&end);
+    LogConvertResult(false, &start, &end, pDxbc, DxbcSize, pExtraOptions, *ppDxil, *pDxilSize, hr);
+    return hr;
+}
+
+__override HRESULT STDMETHODCALLTYPE DxbcConverter::ConvertInDriver(_In_reads_bytes_(8) const UINT32 *pBytecode,
+                                                       _In_opt_z_ LPCVOID pInputSignature,
+                                                       _In_ UINT32 NumInputSignatureElements,
+                                                       _In_opt_z_ LPCVOID pOutputSignature,
+                                                       _In_ UINT32 NumOutputSignatureElements,
+                                                       _In_opt_z_ LPCVOID pPatchConstantSignature,
+                                                       _In_ UINT32 NumPatchConstantSignatureElements,
+                                                       _In_opt_z_ LPCWSTR pExtraOptions,
+                                                       _Out_ IDxcBlob **ppDxilModule,
+                                                       _Outptr_result_maybenull_z_ LPWSTR *ppDiag) {
+    DxcThreadMalloc TM(m_pMalloc);
+    LARGE_INTEGER start, end;
+    QueryPerformanceCounter(&start);
+    DxcRuntimeEtw_DxcTranslate_Start();
+    HRESULT hr = S_OK;
+    UINT32 bcSize = pBytecode[1] * sizeof(UINT32);
+    const BYTE *pDxilBytes = nullptr;
+    UINT32 DxilByteCount = 0;
+    try {
+      sys::fs::MSFileSystem *pFSPtr;
+      IFT(CreateMSFileSystemForDisk(&pFSPtr));
+      unique_ptr<sys::fs::MSFileSystem> pFS(pFSPtr);
+      sys::fs::AutoPerThreadSystem pTS(pFS.get());
+      IFTLLVM(pTS.error_code());
+
+      struct StdErrFlusher {
+        ~StdErrFlusher() { dbgs().flush(); }
+      } S;
+
+      ConvertInDriverImpl(pBytecode,
+                          (const D3D12DDIARG_SIGNATURE_ENTRY_0012 *)pInputSignature,
+                          NumInputSignatureElements,
+                          (const D3D12DDIARG_SIGNATURE_ENTRY_0012 *)pOutputSignature,
+                          NumOutputSignatureElements,
+                          (const D3D12DDIARG_SIGNATURE_ENTRY_0012 *)pPatchConstantSignature,
+                          NumPatchConstantSignatureElements,
+                          pExtraOptions,
+                          ppDxilModule,
+                          ppDiag);
+
+      pDxilBytes = (const BYTE *)(*ppDxilModule)->GetBufferPointer();
+      DxilByteCount = (*ppDxilModule)->GetBufferSize();
+      DxcRuntimeEtw_DxcTranslate_TranslateStats(bcSize, bcSize, (const BYTE *)pBytecode, DxilByteCount);
+
+      hr = S_OK;
+    }
+    CATCH_CPP_ASSIGN_HRESULT();
+    DxcRuntimeEtw_DxcTranslate_Stop(hr);
+    QueryPerformanceCounter(&end);
+    LogConvertResult(true, &start, &end, pBytecode, bcSize, pExtraOptions, pDxilBytes, DxilByteCount, hr);
+    return hr;
+}
+
+DxbcConverter::DxbcConverter()
+: m_dwRef(0)
+, m_pPR(nullptr)
+, m_pOP(nullptr)
+, m_pSM(nullptr)
+, m_DxbcMajor(0)
+, m_DxbcMinor(0)
+, m_pUnusedF32(nullptr)
+, m_pUnusedI32(nullptr)
+, m_NumTempRegs(0)
+, m_pIcbGV(nullptr)
+, m_bDisableHashCheck(false)
+, m_bRunDxilCleanup(true)
+, m_bLegacyCBufferLoad(true)
+, m_TGSMCount(0)
+, m_DepthRegType(D3D10_SB_OPERAND_TYPE_NULL)
+, m_bHasStencilRef(false)
+, m_bHasCoverageOut(false)
+, m_bControlPointPhase(false)
+, m_bPatchConstantPhase(false)
+, m_pInterfaceDataBuffer(nullptr)
+, m_pClassInstanceCBuffers(nullptr)
+, m_pClassInstanceSamplers(nullptr)
+, m_pClassInstanceComparisonSamplers(nullptr)
+, m_NumIfaces(0)
+, m_FcallCount(0) {
+  DXASSERT(OP::CheckOpCodeTable(), "incorrect entry in OpCode property table");
+}
+
+DxbcConverter::~DxbcConverter() {
+}
+
+static void AddDxilPipelineStateValidationToDXBC( DxilModule *pModule,
+                                                  DxilPipelineStateValidation &PSV);
+static void EmitIdentMetadata(llvm::Module *pModule, LPCSTR pValue) {
+  llvm::NamedMDNode *IdentMetadata =
+    pModule->getOrInsertNamedMetadata("llvm.ident");
+  llvm::LLVMContext &Ctx = pModule->getContext();
+
+  llvm::Metadata *IdentNode[] = {llvm::MDString::get(Ctx, pValue)};
+  IdentMetadata->addOperand(llvm::MDNode::get(Ctx, IdentNode));
+}
+
+void WritePart(AbstractMemoryStream *pStream, const void *pData, size_t size) {
+  ULONG cbWritten = 0;
+  pStream->Write(pData, size, &cbWritten);
+}
+
+void WritePart(AbstractMemoryStream *pStream, const SmallVectorImpl<char> &Data) {
+  WritePart(pStream, Data.data(), Data.size());
+}
+
+void DxbcConverter::ConvertImpl(_In_reads_bytes_(DxbcSize) LPCVOID pDxbc,
+                                _In_ UINT32 DxbcSize,
+                                _In_opt_z_ LPCWSTR pExtraOptions,
+                                _Outptr_result_bytebuffer_maybenull_(*pDxilSize) LPVOID *ppDxil,
+                                _Out_ UINT32 *pDxilSize,
+                                _Outptr_result_maybenull_z_ LPWSTR *ppDiag) {
+  IFTARG(pDxbc);
+  IFTARG(ppDxil);
+  IFTARG(pDxilSize);
+  *ppDxil = nullptr;
+  *pDxilSize = 0;
+  if (ppDiag)
+    *ppDiag = nullptr;
+
+  // Parse pExtraOptions.
+  ParseExtraOptions(pExtraOptions);
+
+  // Create the module.
+  m_pModule = std::make_unique<llvm::Module>("main", m_Ctx);
+
+  // Setup DxilModule.
+  m_pPR = &(m_pModule->GetOrCreateDxilModule(/*skipInit*/true));
+  m_pOP = m_pPR->GetOP();
+
+  // Open DXBC container.
+  DxilContainerReader dxbcReader;
+  IFT(dxbcReader.Load(pDxbc, DxbcSize));
+  const void *pMaxPtr = (const char *)pDxbc + DxbcSize;
+  IFTBOOL(pDxbc < pMaxPtr, DXC_E_INCORRECT_DXBC);
+
+  // Obtain the code blob.
+  UINT uCodeBlob;
+  IFT(dxbcReader.FindFirstPartKind(DXBC_GenericShaderEx, &uCodeBlob));
+  if (uCodeBlob == DXIL_CONTAINER_BLOB_NOT_FOUND) {
+      IFT(dxbcReader.FindFirstPartKind(DXBC_GenericShader, &uCodeBlob));
+  }
+  IFTBOOL(uCodeBlob != DXIL_CONTAINER_BLOB_NOT_FOUND, DXC_E_INCORRECT_DXBC);
+
+  const CShaderToken *pByteCode;
+  IFTBOOL(dxbcReader.GetPartContent(uCodeBlob, (const void **)&pByteCode) == S_OK, DXC_E_INCORRECT_DXBC);
+
+  // Parse DXBC container.
+  D3D10ShaderBinary::CShaderCodeParser Parser;
+
+  // 1. Collect information about the shader.
+  Parser.SetShader(pByteCode);
+  AnalyzeShader(Parser);
+
+  // 2. Parse input signature(s).
+  ExtractInputSignatureFromDXBC(dxbcReader, pMaxPtr);
+  ConvertSignature(*m_pInputSignature, m_pPR->GetInputSignature());
+  if (m_pSM->IsDS()) {
+    ExtractPatchConstantSignatureFromDXBC(dxbcReader, pMaxPtr);
+    ConvertSignature(*m_pPatchConstantSignature, m_pPR->GetPatchConstOrPrimSignature());
+  }
+
+  // 3. Parse output signature(s).
+  ExtractOutputSignatureFromDXBC(dxbcReader, pMaxPtr);
+  ConvertSignature(*m_pOutputSignature, m_pPR->GetOutputSignature());
+  if (m_pSM->IsHS()) {
+    ExtractPatchConstantSignatureFromDXBC(dxbcReader, pMaxPtr);
+    ConvertSignature(*m_pPatchConstantSignature, m_pPR->GetPatchConstOrPrimSignature());
+  }
+
+  // 4. Transform DXBC to DXIL.
+  Parser.SetShader(pByteCode);
+  ConvertInstructions(Parser);
+
+  // 5. Emit medatada.
+  m_pPR->EmitDxilMetadata();
+  EmitIdentMetadata(m_pModule.get(), "dxbc2dxil 1.2");
+
+  // 6. Cleanup/Optimize DXIL.
+  Optimize();
+
+  // Serialize DXIL.
+  SmallVector<char, 4*1024> DxilBuffer;
+  SerializeDxil(DxilBuffer);
+
+  // Wrap LLVM module in a DXBC container.
+  size_t DXILSize = DxilBuffer.size_in_bytes();
+  DxilContainerWriter *pContainerWriter = hlsl::NewDxilContainerWriter();
+  pContainerWriter->AddPart(DXBC_DXIL, DXILSize, [=](AbstractMemoryStream *pStream) {
+    WritePart(pStream, DxilBuffer);
+  });
+
+  SmallVector<char, 512> PSVBuffer; // 512 bytes is enough for 30 resources + header
+  {
+    UINT uCBuffers = m_pPR->GetCBuffers().size();
+    UINT uSamplers = m_pPR->GetSamplers().size();
+    UINT uSRVs = m_pPR->GetSRVs().size();
+    UINT uUAVs = m_pPR->GetUAVs().size();
+    UINT uTotalResources = uCBuffers + uSamplers + uSRVs + uUAVs;
+    uint32_t PSVBufferSize = 0;
+    DxilPipelineStateValidation PSV;
+    PSV.InitNew(uTotalResources, nullptr, &PSVBufferSize);
+    PSVBuffer.resize(PSVBufferSize);
+    PSV.InitNew(uTotalResources, PSVBuffer.data(), &PSVBufferSize);
+    AddDxilPipelineStateValidationToDXBC(m_pPR, PSV);
+    pContainerWriter->AddPart(DXBC_PipelineStateValidation, PSVBufferSize, [=](AbstractMemoryStream *pStream) {
+      WritePart(pStream, PSVBuffer);
+    });
+  }
+
+  UINT64 featureBody = 0;
+  { // Append original IO signatures to DXIL blob
+    DXBCFourCC IOSigFourCCArray[] = {
+      DXBC_InputSignature11_1,
+      DXBC_InputSignature,
+      DXBC_OutputSignature11_1,
+      DXBC_OutputSignature5,
+      DXBC_OutputSignature,
+      DXBC_PatchConstantSignature11_1,
+      DXBC_PatchConstantSignature
+    };
+    UINT NumSigs = sizeof(IOSigFourCCArray) / sizeof(IOSigFourCCArray[0]);
+    UINT uBlob = DXIL_CONTAINER_BLOB_NOT_FOUND;
+    UINT uElemSize = 0;
+    const void* pBlobData = nullptr;
+    for(UINT i = 0; i < NumSigs; i++) {
+      IFT(dxbcReader.FindFirstPartKind(IOSigFourCCArray[i], &uBlob));
+      if(uBlob != DXIL_CONTAINER_BLOB_NOT_FOUND) {
+        IFT(dxbcReader.GetPartContent(uBlob, &pBlobData, &uElemSize));
+        pContainerWriter->AddPart(IOSigFourCCArray[i], uElemSize, [=](AbstractMemoryStream *pStream) {
+          WritePart(pStream, pBlobData, uElemSize);
+        });
+      }
+    }
+    // Add DXBC_RootSignature and DXBC_ShaderFeatureInfo if present
+    IFT(dxbcReader.FindFirstPartKind(DXBC_RootSignature, &uBlob));
+    if(uBlob != DXIL_CONTAINER_BLOB_NOT_FOUND) {
+      IFT(dxbcReader.GetPartContent(uBlob, &pBlobData, &uElemSize));
+      pContainerWriter->AddPart(DXBC_RootSignature, uElemSize, [=](AbstractMemoryStream *pStream) {
+        WritePart(pStream, pBlobData, uElemSize);
+      });
+    }
+    IFT(dxbcReader.FindFirstPartKind(DXBC_ShaderFeatureInfo, &uBlob));
+    if(uBlob != DXIL_CONTAINER_BLOB_NOT_FOUND) {
+      IFT(dxbcReader.GetPartContent(uBlob, &pBlobData, &uElemSize));
+      pContainerWriter->AddPart(DXBC_ShaderFeatureInfo, uElemSize, [=](AbstractMemoryStream *pStream) {
+        WritePart(pStream, pBlobData, uElemSize);
+      });
+    }
+    else
+    {
+      // Add one anyway
+      uElemSize = sizeof(UINT64);
+      pContainerWriter->AddPart(DXBC_ShaderFeatureInfo, uElemSize, [=](AbstractMemoryStream *pStream) {
+        WritePart(pStream, (void*)&featureBody, sizeof(featureBody));
+      });
+    }
+  }
+
+  // Serialize the container
+  UINT32 OutputSize = pContainerWriter->size();
+  CComHeapPtr<void> pOutput;
+  IFTBOOL(pOutput.AllocateBytes(OutputSize), E_OUTOFMEMORY);
+
+  CComPtr<AbstractMemoryStream> pOutputStream;
+  IFT(CreateFixedSizeMemoryStream((LPBYTE)pOutput.m_pData, OutputSize, &pOutputStream));
+  pContainerWriter->write(pOutputStream);
+  pOutputStream.Detach();
+
+  *ppDxil = pOutput.Detach();
+  *pDxilSize = OutputSize;
+
+  m_pBuilder.reset();
+  m_pModule.reset();
+
+  // Diagnostics.
+  if (ppDiag)
+    *ppDiag = nullptr;
+}
+
+void DxbcConverter::ConvertInDriverImpl(_In_reads_bytes_(8) const UINT32 *pByteCode,
+                                        _In_opt_z_ const D3D12DDIARG_SIGNATURE_ENTRY_0012 *pInputSignature,
+                                        _In_ UINT32 NumInputSignatureElements,
+                                        _In_opt_z_ const D3D12DDIARG_SIGNATURE_ENTRY_0012 *pOutputSignature,
+                                        _In_ UINT32 NumOutputSignatureElements,
+                                        _In_opt_z_ const D3D12DDIARG_SIGNATURE_ENTRY_0012 *pPatchConstantSignature,
+                                        _In_ UINT32 NumPatchConstantSignatureElements,
+                                        _In_opt_z_ LPCWSTR pExtraOptions,
+                                        _Out_ IDxcBlob **ppDxcBlob,
+                                        _Outptr_result_maybenull_z_ LPWSTR *ppDiag) {
+  IFTARG(pByteCode);
+  IFTARG(ppDxcBlob);
+  UINT SizeInUINTs = pByteCode[1];
+  IFTBOOL(SizeInUINTs >= 2, DXC_E_ERROR_PARSING_DXBC_BYTECODE);
+  *ppDxcBlob = nullptr;
+  if (ppDiag)
+    *ppDiag = nullptr;
+
+  // Parse pExtraOptions.
+  ParseExtraOptions(pExtraOptions);
+
+  // Create the module.
+  m_pModule = std::make_unique<llvm::Module>("main", m_Ctx);
+
+  // Setup DxilModule.
+  m_pPR = &(m_pModule->GetOrCreateDxilModule(/*skipInit*/true));
+  m_pOP = m_pPR->GetOP();
+
+  // Parse DXBC bytecode.
+  D3D10ShaderBinary::CShaderCodeParser Parser;
+
+  // 1. Collect information about the shader.
+  Parser.SetShader(pByteCode);
+  AnalyzeShader(Parser);
+
+  // 2. Parse input signature(s).
+  ExtractSignatureFromDDI(pInputSignature, NumInputSignatureElements, *m_pInputSignature);
+  ConvertSignature(*m_pInputSignature, m_pPR->GetInputSignature());
+  if (m_pSM->IsDS()) {
+    ExtractSignatureFromDDI(pPatchConstantSignature, NumPatchConstantSignatureElements, *m_pPatchConstantSignature);
+    ConvertSignature(*m_pPatchConstantSignature, m_pPR->GetPatchConstOrPrimSignature());
+  }
+
+  // 3. Parse output signature(s).
+  ExtractSignatureFromDDI(pOutputSignature, NumOutputSignatureElements, *m_pOutputSignature);
+  ConvertSignature(*m_pOutputSignature, m_pPR->GetOutputSignature());
+  if (m_pSM->IsHS()) {
+    ExtractSignatureFromDDI(pPatchConstantSignature, NumPatchConstantSignatureElements, *m_pPatchConstantSignature);
+    ConvertSignature(*m_pPatchConstantSignature, m_pPR->GetPatchConstOrPrimSignature());
+  }
+
+  // 4. Transform DXBC to DXIL.
+  Parser.SetShader(pByteCode);
+  ConvertInstructions(Parser);
+
+  // 5. Emit medatada.
+  m_pPR->EmitDxilMetadata();
+
+  // 6. Cleanup/Optimize DXIL.
+  Optimize();
+
+  // Serialize DXIL.
+  SmallVector<char, 8*1024> DxilBuffer;
+  raw_svector_ostream DxilStream(DxilBuffer);
+  WriteBitcodeToFile(m_pModule.get(), DxilStream);
+  DxilStream.flush();
+
+  IFT(DxcCreateBlobOnHeapCopy(DxilBuffer.data(), DxilBuffer.size_in_bytes(), ppDxcBlob));
+
+  m_pBuilder.reset();
+  m_pModule.reset();
+
+  // Diagnostics.
+  if (ppDiag)
+    *ppDiag = nullptr;
+}
+
+void DxbcConverter::ParseExtraOptions(const wchar_t *pExtraOptions) {
+  if (pExtraOptions == nullptr) return;
+
+  // This is temporary implementation for now.
+  wstring Str(pExtraOptions);
+  if (Str.find(L"-disableHashCheck") != wstring::npos)
+    m_bDisableHashCheck = true;
+
+  // Opt out from DXIL cleanup pass.
+  if (Str.find(L"-no-dxil-cleanup") != wstring::npos)
+    m_bRunDxilCleanup = false;
+}
+
+void DxbcConverter::SetShaderGlobalFlags(unsigned GlobalFlags) {
+  // GlobalFlags takes the set of flags defined for D3D10_SB_OPCODE_DCL_GLOBAL_FLAGS:
+  m_pPR->m_ShaderFlags.SetDisableOptimizations          (DXBC::IsFlagDisableOptimizations         (GlobalFlags)); // ~D3D11_1_SB_GLOBAL_FLAG_SKIP_OPTIMIZATION
+  m_pPR->m_ShaderFlags.SetDisableMathRefactoring        (DXBC::IsFlagDisableMathRefactoring       (GlobalFlags)); // ~D3D10_SB_GLOBAL_FLAG_REFACTORING_ALLOWED
+  m_pPR->m_ShaderFlags.SetEnableDoublePrecision         (DXBC::IsFlagEnableDoublePrecision        (GlobalFlags)); // D3D11_SB_GLOBAL_FLAG_ENABLE_DOUBLE_PRECISION_FLOAT_OPS
+  m_pPR->m_ShaderFlags.SetForceEarlyDepthStencil        (DXBC::IsFlagForceEarlyDepthStencil       (GlobalFlags)); // D3D11_SB_GLOBAL_FLAG_FORCE_EARLY_DEPTH_STENCIL
+  m_pPR->m_ShaderFlags.SetLowPrecisionPresent           (DXBC::IsFlagEnableMinPrecision           (GlobalFlags)); // D3D11_1_SB_GLOBAL_FLAG_ENABLE_MINIMUM_PRECISION
+  m_pPR->m_ShaderFlags.SetEnableDoubleExtensions        (DXBC::IsFlagEnableDoubleExtensions       (GlobalFlags)); // D3D11_1_SB_GLOBAL_FLAG_ENABLE_DOUBLE_EXTENSIONS
+  m_pPR->m_ShaderFlags.SetEnableMSAD                    (DXBC::IsFlagEnableMSAD                   (GlobalFlags)); // D3D11_1_SB_GLOBAL_FLAG_ENABLE_SHADER_EXTENSIONS
+  if (IsSM51Plus()) {
+    m_pPR->m_ShaderFlags.SetAllResourcesBound           (DXBC::IsFlagAllResourcesBound            (GlobalFlags)); // D3D12_SB_GLOBAL_FLAG_ALL_RESOURCES_BOUND
+  }
+  m_pPR->m_ShaderFlags.SetEnableRawAndStructuredBuffers (DXBC::IsFlagEnableRawAndStructuredBuffers(GlobalFlags)); // D3D12_SB_GLOBAL_FLAG_ALL_RESOURCES_BOUND
+}
+
+void DxbcConverter::ExtractInputSignatureFromDXBC(DxilContainerReader &dxbcReader, const void *pMaxPtr) {
+  // Obtain the input signature blob.
+  UINT uBlob;
+  IFT(dxbcReader.FindFirstPartKind(DXBC_InputSignature11_1, &uBlob));
+  UINT uElemSize = sizeof(D3D11_INTERNALSHADER_PARAMETER_11_1);
+
+  if (uBlob == DXIL_CONTAINER_BLOB_NOT_FOUND) {
+    IFT(dxbcReader.FindFirstPartKind(DXBC_InputSignature, &uBlob));
+    uElemSize = sizeof(D3D10_INTERNALSHADER_PARAMETER);
+  }
+  IFTBOOL(uBlob != DXIL_CONTAINER_BLOB_NOT_FOUND, DXC_E_INCORRECT_DXBC);
+
+  // Parse signature elements.
+  const D3D10_INTERNALSHADER_SIGNATURE *pSig;
+  IFT(dxbcReader.GetPartContent(uBlob, (const void**)&pSig))
+  ExtractSignatureFromDXBC(pSig, uElemSize, pMaxPtr, *m_pInputSignature);
+}
+
+void DxbcConverter::ExtractOutputSignatureFromDXBC(DxilContainerReader &dxbcReader, const void *pMaxPtr) {
+  // Obtain the output signature blob.
+  UINT uBlob;
+  IFT(dxbcReader.FindFirstPartKind(DXBC_OutputSignature11_1, &uBlob));
+  UINT uElemSize = sizeof(D3D11_INTERNALSHADER_PARAMETER_11_1);
+
+  if (uBlob == DXIL_CONTAINER_BLOB_NOT_FOUND) {
+    IFT(dxbcReader.FindFirstPartKind(DXBC_OutputSignature5, &uBlob));
+    uElemSize = sizeof(D3D11_INTERNALSHADER_PARAMETER_FOR_GS);
+  }
+  if (uBlob == DXIL_CONTAINER_BLOB_NOT_FOUND) {
+    IFT(dxbcReader.FindFirstPartKind(DXBC_OutputSignature, &uBlob));
+    uElemSize = sizeof(D3D10_INTERNALSHADER_PARAMETER);
+  }
+  IFTBOOL(uBlob != DXIL_CONTAINER_BLOB_NOT_FOUND, DXC_E_INCORRECT_DXBC);
+
+  // Parse signature elements.
+  const D3D10_INTERNALSHADER_SIGNATURE *pSig;
+  IFT(dxbcReader.GetPartContent(uBlob, (const void**)&pSig));
+  ExtractSignatureFromDXBC(pSig, uElemSize, pMaxPtr, *m_pOutputSignature);
+}
+
+void DxbcConverter::ExtractPatchConstantSignatureFromDXBC(DxilContainerReader &dxbcReader, const void *pMaxPtr) {
+  // Obtain the patch-constant signature blob.
+  UINT uBlob;
+  IFT(dxbcReader.FindFirstPartKind(DXBC_PatchConstantSignature11_1, &uBlob));
+  UINT uElemSize = sizeof(D3D11_INTERNALSHADER_PARAMETER_11_1);
+
+  if (uBlob == DXIL_CONTAINER_BLOB_NOT_FOUND) {
+    IFT(dxbcReader.FindFirstPartKind(DXBC_PatchConstantSignature, &uBlob));
+    uElemSize = sizeof(D3D10_INTERNALSHADER_PARAMETER);
+  }
+  IFTBOOL(uBlob != DXIL_CONTAINER_BLOB_NOT_FOUND, DXC_E_INCORRECT_DXBC);
+
+  // Parse signature elements.
+  const D3D10_INTERNALSHADER_SIGNATURE *pSig;
+  IFT(dxbcReader.GetPartContent(uBlob, (const void**)&pSig));
+  ExtractSignatureFromDXBC(pSig, uElemSize, pMaxPtr, *m_pPatchConstantSignature);
+}
+
+void DxbcConverter::ExtractSignatureFromDXBC(const D3D10_INTERNALSHADER_SIGNATURE *pSig, 
+                                             UINT uElemSize, const void *pMaxPtr, 
+                                             SignatureHelper &SigHelper) {
+  // Verify signature offsets are within the blob.
+  const char *pCheck = (const char *)pSig;
+  const char *pCheck2 = pCheck + sizeof(D3D10_INTERNALSHADER_SIGNATURE);
+  IFTBOOL(pCheck != nullptr && pCheck < pMaxPtr && pCheck2 <= pMaxPtr, DXC_E_INCORRECT_DXBC);
+  pCheck = (const char *)pSig + pSig->ParameterInfo;
+  pCheck2 = pCheck + pSig->Parameters * uElemSize;
+  IFTBOOL(pCheck <= pMaxPtr && pCheck2 <= pMaxPtr && pCheck <= pCheck2, DXC_E_INCORRECT_DXBC);
+
+  unsigned uParamCount = pSig->Parameters;
+  const char *pSigBase = (const char *)pSig;
+  const char *pParamBase = pSigBase + pSig->ParameterInfo;
+
+  // This is to test in-driver conversion.
+#define TestDDISignature 0
+#if TestDDISignature
+  vector<D3D12DDIARG_SIGNATURE_ENTRY_0012> TestDDI;
+  TestDDI.resize(uParamCount);
+  memset(TestDDI.data(), 0, TestDDI.size()*sizeof(D3D12DDIARG_SIGNATURE_ENTRY_0012));
+
+  unsigned EdgeTess = 0, InsideEdgeTess = 0;
+#endif
+
+  for (unsigned iElement = 0; iElement < uParamCount; iElement++) {
+    D3D11_INTERNALSHADER_PARAMETER_11_1 P = {0};
+    // Properly copy parameters for the serialized form into P.
+    switch (uElemSize) {
+    case sizeof(D3D11_INTERNALSHADER_PARAMETER_11_1):
+      memcpy(&P, pParamBase + iElement*uElemSize, uElemSize);
+      break;
+    case sizeof(D3D11_INTERNALSHADER_PARAMETER_FOR_GS):
+      memcpy(&P, pParamBase + iElement*uElemSize, uElemSize);
+      break;
+    case sizeof(D3D10_INTERNALSHADER_PARAMETER):
+      static_assert(sizeof(D3D11_INTERNALSHADER_PARAMETER_FOR_GS) == 
+                    sizeof(D3D10_INTERNALSHADER_PARAMETER) + FIELD_OFFSET(D3D11_INTERNALSHADER_PARAMETER_FOR_GS, SemanticName),
+                    "Incorrect assumptions about field offset");
+      memcpy(&P.SemanticName, pParamBase + iElement*uElemSize, uElemSize);
+      break;
+    default:
+      IFT(DXC_E_INCORRECT_DXBC);
+    }
+
+    // Extract data from the blob.
+    SignatureHelper::ElementRecord E;
+    // Existing tests use testasm to create shaders with incorrect semantic names.
+    // The converter is compensating for this.
+    if (P.SystemValue == D3D_NAME_UNDEFINED) {
+      // Retrive name from the signature blob.
+      CheckDxbcString(pSigBase + P.SemanticName, pMaxPtr);
+      E.SemanticName  = string(pSigBase + P.SemanticName);
+    } else {
+      // Recover canonical SV_ name.
+      E.SemanticName  = string(DXBC::GetSemanticNameFromD3DName(P.SystemValue));
+    }
+    unsigned SemanticIndex  = DXBC::GetSemanticIndexFromD3DName(P.SystemValue);
+    E.SemanticIndex         = (SemanticIndex == UINT_MAX) ? P.SemanticIndex : SemanticIndex;
+    E.StartRow              = P.Register;
+    E.StartCol              = CMask(P.Mask).GetFirstActiveComp();
+    E.Rows                  = 1;
+    E.Cols                  = CMask(P.Mask).GetNumActiveRangeComps();
+    E.Stream                = P.Stream;
+    E.ComponentType         = DXBC::GetCompTypeWithMinPrec(P.ComponentType, (D3D11_SB_OPERAND_MIN_PRECISION)P.MinPrecision);
+
+#if TestDDISignature
+    D3D12DDIARG_SIGNATURE_ENTRY_0012 &D = TestDDI[iElement];
+    D.Register = P.Register;
+    D.Mask = P.Mask;
+    D.Stream = P.Stream;
+    D.RegisterComponentType = (D3D10_SB_REGISTER_COMPONENT_TYPE)P.ComponentType;
+    D.MinPrecision = (D3D11_SB_OPERAND_MIN_PRECISION)P.MinPrecision;
+
+    switch (P.SystemValue) {
+    case D3D_NAME_UNDEFINED:                      D.SystemValue = D3D10_SB_NAME_UNDEFINED; break;
+    case D3D_NAME_POSITION:                       D.SystemValue = D3D10_SB_NAME_POSITION; break;
+    case D3D_NAME_CLIP_DISTANCE:                  D.SystemValue = D3D10_SB_NAME_CLIP_DISTANCE; break;
+    case D3D_NAME_CULL_DISTANCE:                  D.SystemValue = D3D10_SB_NAME_CULL_DISTANCE; break;
+    case D3D_NAME_RENDER_TARGET_ARRAY_INDEX:      D.SystemValue = D3D10_SB_NAME_RENDER_TARGET_ARRAY_INDEX; break;
+    case D3D_NAME_VIEWPORT_ARRAY_INDEX:           D.SystemValue = D3D10_SB_NAME_VIEWPORT_ARRAY_INDEX; break;
+    case D3D_NAME_VERTEX_ID:                      D.SystemValue = D3D10_SB_NAME_VERTEX_ID; break;
+    case D3D_NAME_PRIMITIVE_ID:                   D.SystemValue = D3D10_SB_NAME_PRIMITIVE_ID; break;
+    case D3D_NAME_INSTANCE_ID:                    D.SystemValue = D3D10_SB_NAME_INSTANCE_ID; break;
+    case D3D_NAME_IS_FRONT_FACE:                  D.SystemValue = D3D10_SB_NAME_IS_FRONT_FACE; break;
+    case D3D_NAME_SAMPLE_INDEX:                   D.SystemValue = D3D10_SB_NAME_SAMPLE_INDEX; break;
+    case D3D_NAME_FINAL_QUAD_EDGE_TESSFACTOR:
+      switch (EdgeTess) {
+      case 0: D.SystemValue = D3D11_SB_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR; break;
+      case 1: D.SystemValue = D3D11_SB_NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR; break;
+      case 2: D.SystemValue = D3D11_SB_NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR; break;
+      case 3: D.SystemValue = D3D11_SB_NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR; break;
+      default:
+        DXASSERT_NOMSG(false);
+      }
+      EdgeTess++;
+      break;
+    case D3D_NAME_FINAL_QUAD_INSIDE_TESSFACTOR:
+      switch (InsideEdgeTess) {
+      case 0: D.SystemValue = D3D11_SB_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR; break;
+      case 1: D.SystemValue = D3D11_SB_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR; break;
+      default:
+        DXASSERT_NOMSG(false);
+      }
+      InsideEdgeTess++;
+      break;
+    case D3D_NAME_FINAL_TRI_EDGE_TESSFACTOR:
+      switch (EdgeTess) {
+      case 0: D.SystemValue = D3D11_SB_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR; break;
+      case 1: D.SystemValue = D3D11_SB_NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR; break;
+      case 2: D.SystemValue = D3D11_SB_NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR; break;
+      default:
+        DXASSERT_NOMSG(false);
+      }
+      EdgeTess++;
+      break;
+    case D3D_NAME_FINAL_TRI_INSIDE_TESSFACTOR:    D.SystemValue = D3D11_SB_NAME_FINAL_TRI_INSIDE_TESSFACTOR; break;
+    case D3D_NAME_FINAL_LINE_DETAIL_TESSFACTOR:   D.SystemValue = D3D11_SB_NAME_FINAL_LINE_DETAIL_TESSFACTOR; break;
+    case D3D_NAME_FINAL_LINE_DENSITY_TESSFACTOR:  D.SystemValue = D3D11_SB_NAME_FINAL_LINE_DENSITY_TESSFACTOR; break;
+    case D3D_NAME_TARGET:
+    case D3D_NAME_DEPTH:
+    case D3D_NAME_COVERAGE:
+    case D3D_NAME_DEPTH_GREATER_EQUAL:
+    case D3D_NAME_DEPTH_LESS_EQUAL:
+    case D3D_NAME_STENCIL_REF:
+    case D3D_NAME_INNER_COVERAGE:                 D.SystemValue = D3D10_SB_NAME_UNDEFINED; break;
+    default:
+      DXASSERT_NOMSG(false);
+    }
+#else
+    SigHelper.m_ElementRecords.emplace_back(E);
+#endif
+  }
+
+#if TestDDISignature
+  ExtractSignatureFromDDI(TestDDI.data(), (unsigned)TestDDI.size(), SigHelper);
+#endif
+}
+
+void DxbcConverter::ExtractSignatureFromDDI(const D3D12DDIARG_SIGNATURE_ENTRY_0012 *pElements,
+                                            unsigned NumElements,
+                                            SignatureHelper &SigHelper) {
+  string NamePrefix;
+  if (SigHelper.IsInput())
+    NamePrefix = "_in";
+  else if (SigHelper.IsOutput())
+    NamePrefix = "_out";
+  else
+    NamePrefix = "_pc";
+
+  unsigned iArbitrarySemantic = 0;
+  for (unsigned iElement = 0; iElement < NumElements; iElement++) {
+    const D3D12DDIARG_SIGNATURE_ENTRY_0012 &P = pElements[iElement];
+
+    // Extract data from DDI signature element record.
+    SignatureHelper::ElementRecord E;
+
+    E.StartRow              = P.Register;
+    E.StartCol              = CMask(P.Mask).GetFirstActiveComp();
+    E.Rows                  = 1;
+    E.Cols                  = CMask(P.Mask).GetNumActiveRangeComps();
+    E.Stream                = P.Stream;
+
+    if (P.SystemValue == D3D10_SB_NAME_UNDEFINED) {
+      E.ComponentType = DXBC::GetCompTypeWithMinPrec((D3D_REGISTER_COMPONENT_TYPE)P.RegisterComponentType, (D3D11_SB_OPERAND_MIN_PRECISION)P.MinPrecision);
+
+      // For PS output, try to disambiguate semantic based on register index.
+      if (m_pSM->IsPS() && SigHelper.IsOutput()) {
+        if (P.Register != -1) {
+          // This must be SV_Target.
+          E.SemanticName = "SV_Target";
+          E.SemanticIndex = P.Register;
+        } else {
+          E.SemanticIndex = P.Register;
+          switch (P.RegisterComponentType) {
+          case D3D10_SB_REGISTER_COMPONENT_UINT32:
+          case D3D10_SB_REGISTER_COMPONENT_SINT32: {
+            // This must be SV_StencilRef.
+            if (m_bHasStencilRef) {
+              E.SemanticName = "SV_StencilRef";
+            } else if (m_bHasCoverageOut) {
+              E.SemanticName = "SV_Coverage";
+            } else {
+              IFTBOOL(false, DXC_E_INCORRECT_DDI_SIGNATURE);
+            }
+            break;
+          }
+          case D3D10_SB_REGISTER_COMPONENT_FLOAT32: {
+            // This must be SV_Depth*.
+            switch (m_DepthRegType) {
+            case D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH:
+              E.SemanticName = "SV_Depth";
+              break;
+            case D3D11_SB_OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL:
+              E.SemanticName = "SV_DepthGreaterEqual";
+              break;
+            case D3D11_SB_OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL:
+              E.SemanticName = "SV_DepthLessEqual";
+              break;
+            case D3D10_SB_OPERAND_TYPE_NULL:
+            default:
+              IFT(DXC_E_INCORRECT_DDI_SIGNATURE);
+            }
+            break;
+          }
+          default:
+            IFT(DXC_E_INCORRECT_DDI_SIGNATURE);
+          }
+        }
+      } else {
+        // Arbitrary semantic.
+        E.SemanticName  = NamePrefix + std::to_string(iArbitrarySemantic++);
+        E.SemanticIndex = iElement;
+      }
+    } else {
+      E.SemanticName  = string(DXBC::GetD3D10SBName(P.SystemValue));
+      E.SemanticIndex = DXBC::GetD3D10SBSemanticIndex(P.SystemValue);
+      if (P.RegisterComponentType != D3D_REGISTER_COMPONENT_UNKNOWN) {
+        E.ComponentType = DXBC::GetCompTypeWithMinPrec((D3D_REGISTER_COMPONENT_TYPE)P.RegisterComponentType, (D3D11_SB_OPERAND_MIN_PRECISION)P.MinPrecision);
+      } else {
+        E.ComponentType = DXBC::GetD3DRegCompType(P.SystemValue);
+      }
+    }
+
+    // This would happen is component type is not supplied by the runtime.
+    IFTBOOL(!E.ComponentType.IsInvalid(), DXC_E_INCORRECT_DDI_SIGNATURE);
+    
+    SigHelper.m_ElementRecords.emplace_back(E);
+  }
+}
+
+void DxbcConverter::ConvertSignature(SignatureHelper &SigHelper, DxilSignature &DxilSig) {
+  // Sort SigHelper.m_UsedElements for upcoming binary search.
+  std::sort(SigHelper.m_UsedElements.begin(), SigHelper.m_UsedElements.end(), SignatureHelper::UsedElement::LTByStreamAndStartRowAndStartCol());
+
+  if (!SigHelper.m_Ranges.empty()) {
+    // Adjust range columns to tightly include components of signature elements.
+    for (size_t iRange = 0; iRange < SigHelper.m_Ranges.size(); iRange++) {
+      SignatureHelper::Range &R = SigHelper.m_Ranges[iRange];
+      unsigned RangeStartCol = UINT32_MAX;
+      unsigned RangeEndCol = UINT32_MAX;
+
+      for (size_t iElement = 0; iElement < SigHelper.m_ElementRecords.size(); iElement++) {
+        const SignatureHelper::ElementRecord &SigElem = SigHelper.m_ElementRecords[iElement];
+        unsigned StartRow = SigElem.StartRow;
+        unsigned StartCol = SigElem.StartCol;
+        unsigned Rows     = SigElem.Rows; DXASSERT_NOMSG(Rows == 1);
+        unsigned Cols     = SigElem.Cols;
+        unsigned Stream   = SigElem.Stream;
+
+        if (R.OutputStream != Stream)
+          continue;
+
+        if (R.StartRow <= StartRow  &&  StartRow < R.StartRow+R.Rows) {
+          if (!(StartCol+Cols-1 < R.GetStartCol() || R.GetEndCol() < StartCol)) {
+            // Signature element overlaps with the declared range.
+            if (RangeStartCol != UINT32_MAX) {
+              RangeStartCol = std::min(RangeStartCol, StartCol);
+              RangeEndCol = std::max(RangeEndCol, StartCol+Cols-1);
+            } else {
+              RangeStartCol = StartCol;
+              RangeEndCol = StartCol+Cols-1;
+            }
+          }
+        }
+      }
+      R.StartCol = RangeStartCol;
+      R.Cols = RangeEndCol - RangeStartCol + 1;
+    }
+
+    // Coalesce declaration ranges if they overlap.
+    std::sort(SigHelper.m_Ranges.begin(), SigHelper.m_Ranges.end(), SignatureHelper::Range::LTRangeByStreamAndStartRowAndStartCol());
+    unsigned iLastEntryIndex = 0;
+    for (size_t i = 1; i < SigHelper.m_Ranges.size(); i++) {
+      // Current range into which we try to coalesce.
+      SignatureHelper::Range &R1 = SigHelper.m_Ranges[iLastEntryIndex];
+      // A range that is a candidate for coalescing.
+      const SignatureHelper::Range &R2 = SigHelper.m_Ranges[i];
+      // Do R1 and R2 overlap?
+      DXASSERT_NOMSG(R1.GetStartRow() <= R2.GetStartRow());
+      bool bOverlaps = (R1.GetStartRow() <= R2.GetStartRow()  &&  R2.GetStartRow() <= R1.GetEndRow()) &&
+                      !(R1.GetEndCol() < R2.GetStartCol()  ||  R2.GetEndCol() < R1.GetStartCol());
+      if (bOverlaps) {
+        // Coalesce ranges.
+        R1.Rows = std::max(R1.Rows, R2.GetEndRow() - R1.GetStartRow() + 1);
+        unsigned StartCol = std::min(R1.GetStartCol(), R2.GetStartCol());
+        unsigned EndCol = std::max(R1.GetEndCol(), R2.GetEndCol());
+        R1.StartCol = StartCol;
+        R1.Cols = EndCol - R1.StartCol + 1;
+      } else {
+        iLastEntryIndex++;
+        SigHelper.m_Ranges[iLastEntryIndex] = SigHelper.m_Ranges[i];
+      }
+    }
+    SigHelper.m_Ranges.resize(iLastEntryIndex + 1);
+  }
+
+  // map range elements from SigHelper.m_ElementRecords to dxil signature element index
+  std::map<unsigned, unsigned> RangeElementToDxilElement;
+
+  for (size_t iElement = 0; iElement < SigHelper.m_ElementRecords.size(); iElement++) {
+    const SignatureHelper::ElementRecord &SigElem = SigHelper.m_ElementRecords[iElement];
+    const string &SemanticName  = SigElem.SemanticName;
+    unsigned SemanticIndex      = SigElem.SemanticIndex;
+    unsigned StartRow           = SigElem.StartRow;
+    unsigned StartCol           = SigElem.StartCol;
+    unsigned Rows               = SigElem.Rows; DXASSERT_NOMSG(Rows == 1);
+    unsigned Cols               = SigElem.Cols;
+    unsigned Stream             = SigElem.Stream;
+    CompType ComponentType      = SigElem.ComponentType;
+
+    // Determine interpolation mode by matching the corresponding decl record.
+    D3D_INTERPOLATION_MODE D3DInterpMode = D3D_INTERPOLATION_UNDEFINED;
+    if (m_pSM->IsPS() && SigHelper.IsInput()) {
+      bool bFirstUse = false;
+      if (!SigHelper.m_UsedElements.empty()) {
+        for (unsigned i = 0; i < Cols; i++) {
+          unsigned c = StartCol + i;
+          // Find used-element lower bound.
+          SignatureHelper::UsedElement E1;
+          E1.Row = StartRow;
+          E1.StartCol = StartCol;
+          E1.OutputStream = Stream;
+          auto it = std::lower_bound(SigHelper.m_UsedElements.begin(), SigHelper.m_UsedElements.end(), E1, SignatureHelper::UsedElement::LTByStreamAndStartRowAndStartCol());
+
+          if (it != SigHelper.m_UsedElements.end()) {
+            SignatureHelper::UsedElement &E2 = *it;
+            if (E2.Row == E1.Row && (E2.StartCol <= c && c < E2.StartCol+E2.Cols)) {
+              if (!bFirstUse) {
+                bFirstUse = true;
+                D3DInterpMode = E2.InterpolationMode;
+              } else {
+                DXASSERT_DXBC(D3DInterpMode == E2.InterpolationMode);
+              }
+            }
+          }
+        }
+      }
+    }
+
+    // Create a new signature element.
+    InterpolationMode::Kind IMK = DXBC::GetInterpolationModeKind(D3DInterpMode);
+    unique_ptr<DxilSignatureElement> pE(SigHelper.m_Signature.CreateElement());
+    pE->Initialize(SemanticName, ComponentType, InterpolationMode(IMK), Rows, Cols, StartRow, StartCol);
+    pE->SetOutputStream(Stream);
+    DxilSignatureElement &E = *pE;
+
+    // Check range containment.
+    bool bInRange = false;
+    if (!SigHelper.m_Ranges.empty()) {
+      // Search which range contains the element.
+      for (size_t iRange = 0; iRange < SigHelper.m_Ranges.size(); iRange++) {
+        SignatureHelper::Range &R = SigHelper.m_Ranges[iRange];
+
+        if (R.OutputStream != Stream)
+          continue;
+
+        if (R.StartRow <= StartRow  &&  StartRow < R.StartRow+R.Rows) {
+          if (!(StartCol+Cols-1 < R.GetStartCol() || R.GetEndCol() < StartCol)) {
+            // Found containment.
+            bInRange = true;
+            auto itKeyDxilEl = RangeElementToDxilElement.find(iElement);
+            if (itKeyDxilEl == RangeElementToDxilElement.end()) {
+              // First element in range
+              unsigned iDxilElementIndex = (unsigned)SigHelper.m_Signature.GetElements().size();
+              E.AppendSemanticIndex(SemanticIndex);
+
+              // Search for all matching elements by semantic in range to expand
+              // the range of this element:
+              for (size_t iOtherEl = iElement + 1;
+                   iOtherEl < SigHelper.m_ElementRecords.size() && StartRow + Rows < R.StartRow + R.Rows;
+                   iOtherEl++) {
+                // Skip elements that are part of another captured range already
+                if (RangeElementToDxilElement.find(iOtherEl) != RangeElementToDxilElement.end())
+                  continue;
+                const SignatureHelper::ElementRecord &OtherEl = SigHelper.m_ElementRecords[iOtherEl];
+                // There should be no gaps for indexed element, so we're done if we find one.
+                if (OtherEl.StartRow > StartRow + Rows)
+                  break;
+                if (SemanticName.compare(OtherEl.SemanticName) == 0) {
+                  // OtherEl should always have one row
+                  DXASSERT_DXBC(OtherEl.Rows == 1);
+                  // should always be adding one row at a time in order, and single
+                  // indexed element should not have different start column.
+                  if (OtherEl.StartRow == StartRow + Rows &&
+                      StartCol == OtherEl.StartCol) {
+                    RangeElementToDxilElement[iOtherEl] = iDxilElementIndex;
+                    Cols = std::max(Cols, OtherEl.Cols);
+                    Rows++;
+                    E.AppendSemanticIndex(OtherEl.SemanticIndex);
+                  }
+                }
+              }
+              // Adjust element dimensions to encompas matching elements.
+              E.SetStartCol(StartCol);
+              E.SetCols(Cols);
+              E.SetRows(Rows);
+              SigHelper.m_Signature.AppendElement(std::move(pE));
+            } else {
+#ifdef DBG
+              // Verify match with range representative element.
+              DxilSignatureElement &RE = SigHelper.m_Signature.GetElement(itKeyDxilEl->second);
+              DXASSERT_DXBC(RE.GetCompType() == E.GetCompType());
+              DXASSERT_DXBC(*RE.GetInterpolationMode() == *E.GetInterpolationMode());
+#endif
+            }
+
+            break;
+          } else {
+            // Check that there is no overlap.
+            DXASSERT_DXBC(StartCol+Cols <= R.StartCol  ||  StartCol >= R.StartCol+R.Cols);
+          }
+        }
+      }
+    }
+
+    if (!bInRange) {
+      DXASSERT(E.GetSemanticIndexVec().empty(), "otherwise a bug");
+      E.AppendSemanticIndex(SemanticIndex);
+
+      SigHelper.m_Signature.AppendElement(std::move(pE));
+    }
+  }
+
+  // Add SGVs that are not present in the signature blob.
+  if (SigHelper.m_bHasInputCoverage || SigHelper.m_bHasInnerInputCoverage) {
+    DXASSERT_DXBC(m_pSM->IsPS() && SigHelper.IsInput());
+    string SemName;
+    if (SigHelper.m_bHasInputCoverage) {
+      DXASSERT_DXBC(!SigHelper.m_bHasInnerInputCoverage);
+      SemName = string("SV_Coverage");
+    } else {
+      DXASSERT_DXBC(!SigHelper.m_bHasInputCoverage && SigHelper.m_bHasInnerInputCoverage);
+      SemName = string("SV_InnerCoverage");
+    }
+
+    unique_ptr<DxilSignatureElement> E(SigHelper.m_Signature.CreateElement());
+    E->Initialize(SemName, CompType::Kind::U32, InterpolationMode(), 1, 1, Semantic::kUndefinedRow, 0);
+    E->AppendSemanticIndex(0);
+
+    SigHelper.m_Signature.AppendElement(std::move(E));
+  }
+
+  // Set up DXBC <reg,comp> to Element mapping or DXBC OperandRegType to Element mapping,
+  // depending on the semantic type.
+  for (size_t iElem = 0; iElem < SigHelper.m_Signature.GetElements().size(); iElem++) {
+    DxilSignatureElement &E = SigHelper.m_Signature.GetElement(iElem);
+
+    bool bUpdateRegMap = E.IsAllocated();
+
+    switch (E.GetKind()) {
+    case Semantic::Kind::Coverage:
+    case Semantic::Kind::InnerCoverage:
+    case Semantic::Kind::Depth:
+    case Semantic::Kind::DepthGreaterEqual:
+    case Semantic::Kind::DepthLessEqual:
+    case Semantic::Kind::StencilRef: {
+      bUpdateRegMap = false;
+      D3D10_SB_OPERAND_TYPE OperandRegType = DXBC::GetOperandRegType(E.GetKind(), /*IsOutput*/SigHelper.IsOutput());
+      DXASSERT_DXBC(SigHelper.m_DxbcSgvToSignatureElement.find(OperandRegType) == SigHelper.m_DxbcSgvToSignatureElement.end());
+      SigHelper.m_DxbcSgvToSignatureElement[OperandRegType] = (unsigned)iElem;
+      break;
+    }
+    }
+
+    if (bUpdateRegMap) {
+      DXASSERT_NOMSG(E.IsAllocated());
+      unsigned Stream = E.GetOutputStream();
+      for (unsigned iRow = 0; iRow < E.GetRows(); iRow++) {
+        unsigned r = E.GetStartRow() + iRow;
+        for (unsigned iCol = 0; iCol < E.GetCols(); iCol++) {
+          unsigned c = E.GetStartCol() + iCol;
+          SignatureHelper::RegAndCompAndStream Key(r, c, Stream);
+          DXASSERT(SigHelper.m_DxbcRegisterToSignatureElement.find(Key) == SigHelper.m_DxbcRegisterToSignatureElement.end(), "otherwise elements are wrong");
+          SigHelper.m_DxbcRegisterToSignatureElement[Key] = (unsigned)iElem;
+        }
+      }
+    }
+  }
+
+  // Clone signature elements into DxilModule.
+  for (size_t i = 0; i < SigHelper.m_Signature.GetElements().size(); i++) {
+    DxilSignatureElement &E = SigHelper.m_Signature.GetElement(i);
+    DXIL::SemanticInterpretationKind I = E.GetInterpretation();
+    switch (I) {
+    case DXIL::SemanticInterpretationKind::NA:
+    case DXIL::SemanticInterpretationKind::NotInSig:
+    case DXIL::SemanticInterpretationKind::Invalid:
+      continue;
+    }
+    unique_ptr<DxilSignatureElement> pClone(new DxilSignatureElement(E));
+    switch (I) {
+    case DXIL::SemanticInterpretationKind::NotPacked:
+    case DXIL::SemanticInterpretationKind::Shadow:
+      // Make sure element is unallocated in this case (DXBC allocates some of these)
+      pClone->SetStartRow(Semantic::kUndefinedRow);
+      pClone->SetStartCol(Semantic::kUndefinedCol);
+      break;
+    }
+    DxilSig.AppendElement(std::move(pClone));
+  }
+}
+
+static void AddDxilPipelineStateValidationToDXBC(
+  DxilModule *pModule,
+  DxilPipelineStateValidation &PSV)
+{
+  UINT uCBuffers = pModule->GetCBuffers().size();
+  UINT uSamplers = pModule->GetSamplers().size();
+  UINT uSRVs = pModule->GetSRVs().size();
+  UINT uUAVs = pModule->GetUAVs().size();
+  UINT uTotalResources = uCBuffers + uSamplers + uSRVs + uUAVs;
+
+  // Set DxilRuntimInfo
+  PSVRuntimeInfo0 *pInfo = PSV.GetPSVRuntimeInfo0();
+  const ShaderModel *pSM = pModule->GetShaderModel();
+  pInfo->MinimumExpectedWaveLaneCount = 0;
+  pInfo->MaximumExpectedWaveLaneCount = -1;
+
+  switch (pSM->GetKind()) {
+    case ShaderModel::Kind::Vertex: {
+      pInfo->VS.OutputPositionPresent = 0;
+      DxilSignature &S = pModule->GetOutputSignature();
+      for (auto &&E : S.GetElements()) {
+        if (E->GetKind() == Semantic::Kind::Position) {
+          // Ideally, we might check never writes mask here,
+          // but this is not yet part of the signature element in Dxil
+          pInfo->VS.OutputPositionPresent = 1;
+          break;
+        }
+      }
+      break;
+    }
+    case ShaderModel::Kind::Hull: {
+      pInfo->HS.InputControlPointCount = (UINT)pModule->GetInputControlPointCount();
+      pInfo->HS.OutputControlPointCount = (UINT)pModule->GetOutputControlPointCount();
+      pInfo->HS.TessellatorDomain = (UINT)pModule->GetTessellatorDomain();
+      pInfo->HS.TessellatorOutputPrimitive = (UINT)pModule->GetTessellatorOutputPrimitive();
+      break;
+    }
+    case ShaderModel::Kind::Domain: {
+      pInfo->DS.InputControlPointCount = (UINT)pModule->GetInputControlPointCount();
+      pInfo->DS.OutputPositionPresent = 0;
+      DxilSignature &S = pModule->GetOutputSignature();
+      for (auto &&E : S.GetElements()) {
+        if (E->GetKind() == Semantic::Kind::Position) {
+          // Ideally, we might check never writes mask here,
+          // but this is not yet part of the signature element in Dxil
+          pInfo->DS.OutputPositionPresent = 1;
+          break;
+        }
+      }
+      pInfo->DS.TessellatorDomain = (UINT)pModule->GetTessellatorDomain();
+      break;
+    }
+    case ShaderModel::Kind::Geometry: {
+      pInfo->GS.InputPrimitive = (UINT)pModule->GetInputPrimitive();
+      // NOTE: For OutputTopology, pick one from a used stream, or if none
+      // are used, use stream 0, and set OutputStreamMask to 1.
+      pInfo->GS.OutputTopology = (UINT)pModule->GetStreamPrimitiveTopology();
+      pInfo->GS.OutputStreamMask = pModule->GetActiveStreamMask();
+      pInfo->GS.OutputPositionPresent = 0;
+      DxilSignature &S = pModule->GetOutputSignature();
+      for (auto &&E : S.GetElements()) {
+        if (E->GetKind() == Semantic::Kind::Position) {
+          // Ideally, we might check never writes mask here,
+          // but this is not yet part of the signature element in Dxil
+          pInfo->GS.OutputPositionPresent = 1;
+          break;
+        }
+      }
+      break;
+    }
+    case ShaderModel::Kind::Pixel: {
+      pInfo->PS.DepthOutput = 0;
+      pInfo->PS.SampleFrequency = 0;
+      {
+        DxilSignature &S = pModule->GetInputSignature();
+        for (auto &&E : S.GetElements()) {
+          if (E->GetInterpolationMode()->IsAnySample() ||
+              E->GetKind() == Semantic::Kind::SampleIndex) {
+            pInfo->PS.SampleFrequency = 1;
+            break;
+          }
+        }
+      }
+      {
+        DxilSignature &S = pModule->GetOutputSignature();
+        for (auto &&E : S.GetElements()) {
+          if (E->IsAnyDepth()) {
+            pInfo->PS.DepthOutput = 1;
+            break;
+          }
+        }
+      }
+      break;
+    }
+  }
+
+  // Set resource binding information
+  UINT uResIndex = 0;
+  for (auto &&R : pModule->GetCBuffers()) {
+    DXASSERT_NOMSG(uResIndex < uTotalResources);
+    PSVResourceBindInfo0 *pBindInfo = PSV.GetPSVResourceBindInfo0(uResIndex);
+    DXASSERT_NOMSG(pBindInfo);
+    pBindInfo->ResType = (UINT)PSVResourceType::CBV;
+    pBindInfo->Space = R->GetSpaceID();
+    pBindInfo->LowerBound = R->GetLowerBound();
+    pBindInfo->UpperBound = R->GetUpperBound();
+    uResIndex++;
+  }
+  for (auto &&R : pModule->GetSamplers()) {
+    DXASSERT_NOMSG(uResIndex < uTotalResources);
+    PSVResourceBindInfo0 *pBindInfo = PSV.GetPSVResourceBindInfo0(uResIndex);
+    DXASSERT_NOMSG(pBindInfo);
+    pBindInfo->ResType = (UINT)PSVResourceType::Sampler;
+    pBindInfo->Space = R->GetSpaceID();
+    pBindInfo->LowerBound = R->GetLowerBound();
+    pBindInfo->UpperBound = R->GetUpperBound();
+    uResIndex++;
+  }
+  for (auto &&R : pModule->GetSRVs()) {
+    DXASSERT_NOMSG(uResIndex < uTotalResources);
+    PSVResourceBindInfo0 *pBindInfo = PSV.GetPSVResourceBindInfo0(uResIndex);
+    DXASSERT_NOMSG(pBindInfo);
+    if (R->IsStructuredBuffer()) {
+      pBindInfo->ResType = (UINT)PSVResourceType::SRVStructured;
+    } else if (R->IsRawBuffer()) {
+      pBindInfo->ResType = (UINT)PSVResourceType::SRVRaw;
+    } else {
+      pBindInfo->ResType = (UINT)PSVResourceType::SRVTyped;
+    }
+    pBindInfo->Space = R->GetSpaceID();
+    pBindInfo->LowerBound = R->GetLowerBound();
+    pBindInfo->UpperBound = R->GetUpperBound();
+    uResIndex++;
+  }
+  for (auto &&R : pModule->GetUAVs()) {
+    DXASSERT_NOMSG(uResIndex < uTotalResources);
+    PSVResourceBindInfo0 *pBindInfo = PSV.GetPSVResourceBindInfo0(uResIndex);
+    DXASSERT_NOMSG(pBindInfo);
+    if (R->IsStructuredBuffer()) {
+      if (R->HasCounter())
+        pBindInfo->ResType = (UINT)PSVResourceType::UAVStructuredWithCounter;
+      else
+        pBindInfo->ResType = (UINT)PSVResourceType::UAVStructured;
+    } else if (R->IsRawBuffer()) {
+      pBindInfo->ResType = (UINT)PSVResourceType::UAVRaw;
+    } else {
+      pBindInfo->ResType = (UINT)PSVResourceType::UAVTyped;
+    }
+    pBindInfo->Space = R->GetSpaceID();
+    pBindInfo->LowerBound = R->GetLowerBound();
+    pBindInfo->UpperBound = R->GetUpperBound();
+    uResIndex++;
+  }
+  DXASSERT_NOMSG(uResIndex == uTotalResources);
+}
+
+void DxbcConverter::AnalyzeShader(D3D10ShaderBinary::CShaderCodeParser &Parser) {
+  // Parse shader model.
+  D3D10_SB_TOKENIZED_PROGRAM_TYPE ShaderType = Parser.ShaderType();
+  m_DxbcMajor = Parser.ShaderMajorVersion();
+  m_DxbcMinor = Parser.ShaderMinorVersion();
+  ShaderModel::Kind ShaderKind = DXBC::GetShaderModelKind(ShaderType);
+  // The converter always promotes the shader version to 6.0.
+  m_pSM = ShaderModel::Get(ShaderKind, 6, 0);
+  m_pPR->SetShaderModel(m_pSM);
+
+  // By default refactoring is disallowed, unless we encounter
+  // dcl_globalflags allowRefactoring
+  m_pPR->m_ShaderFlags.SetDisableMathRefactoring(true);
+  // By default, all resources are assumed bound for SM5.0 shaders,
+  // unless we encounter interface declarations
+  m_pPR->m_ShaderFlags.SetAllResourcesBound(true);
+
+  // Setup signature helpers.
+  m_pInputSignature.reset(new SignatureHelper(m_pSM->GetKind(), DXIL::SignatureKind::Input));
+  m_pOutputSignature.reset(new SignatureHelper(m_pSM->GetKind(), DXIL::SignatureKind::Output));
+  m_pPatchConstantSignature.reset(new SignatureHelper(m_pSM->GetKind(), DXIL::SignatureKind::PatchConstOrPrim));
+
+  // Collect:
+  //   1. Declarations
+  //   2. Labels
+  // Declare:
+  //   1. Global symbols for resources/samplers.
+  //   2. Their types.
+  BYTE CurrentOutputStream = 0;
+  unsigned MaxOutputRegister = 0;
+  m_bControlPointPhase = false;
+  bool bPatchConstantPhase = false;
+  D3D10ShaderBinary::CInstruction Inst;
+  while(!Parser.EndOfShader()) {
+    Parser.ParseInstruction(&Inst);
+
+    switch (Inst.OpCode()) {
+    case D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER: {
+      // Record this cbuffer declaration in DxilModule.
+      unsigned ID = m_pPR->AddCBuffer(unique_ptr<DxilCBuffer>(new DxilCBuffer));
+      DxilCBuffer &R = m_pPR->GetCBuffer(ID);  // R == record
+      R.SetID(ID);
+      // Root signature bindings.
+      unsigned RangeID = Inst.m_Operands[0].m_Index[0].m_RegIndex;
+      unsigned CBufferSize = Inst.m_ConstantBufferDecl.Size * DXBC::kWidth * 4;
+      unsigned LB, RangeSize;
+      switch (Inst.m_Operands[0].m_IndexDimension) {
+      case D3D10_SB_OPERAND_INDEX_2D: // SM 5.0-
+        LB = RangeID;
+        RangeSize = 1;
+        break;
+      case D3D10_SB_OPERAND_INDEX_3D: // SM 5.1
+        LB = Inst.m_Operands[0].m_Index[1].m_RegIndex;
+        RangeSize = Inst.m_Operands[0].m_Index[2].m_RegIndex != UINT_MAX ? Inst.m_Operands[0].m_Index[2].m_RegIndex - LB + 1 : UINT_MAX;
+        break;
+      default:
+        DXASSERT_DXBC(false);
+        IFTARG(NULL);
+      }
+      R.SetLowerBound(LB);
+      R.SetRangeSize(RangeSize);
+      R.SetSpaceID(Inst.m_ConstantBufferDecl.Space);
+      // Declare global variable.
+      R.SetGlobalName(SynthesizeResGVName("CB", R.GetID()));
+      StructType *pResType = GetStructResElemType(CBufferSize);
+      R.SetGlobalSymbol(DeclareUndefPtr(pResType, DXIL::kCBufferAddrSpace));
+      R.SetHandle(nullptr);
+
+      // CBuffer-specific state.
+      R.SetSize(CBufferSize);
+      //R.SetImmediateIndexed(Inst.m_ConstantBufferDecl.AccessPattern == D3D10_SB_CONSTANT_BUFFER_IMMEDIATE_INDEXED);
+
+      // Record shader register/rangeID mapping for upcoming instruction conversion.
+      DXASSERT(m_CBufferRangeMap.find(RangeID) == m_CBufferRangeMap.end(), "otherwise overlapping declarations");
+      m_CBufferRangeMap[RangeID] = R.GetID();
+      break;
+    }
+
+    case D3D10_SB_OPCODE_DCL_SAMPLER: {
+      // Record this sampler declaration in DxilModule.
+      unsigned ID = m_pPR->AddSampler(unique_ptr<DxilSampler>(new DxilSampler));
+      DxilSampler &R = m_pPR->GetSampler(ID);  // R == record
+      R.SetID(ID);
+      // Root signature bindings.
+      unsigned RangeID = Inst.m_Operands[0].m_Index[0].m_RegIndex;
+      unsigned LB, RangeSize;
+      switch (Inst.m_Operands[0].m_IndexDimension) {
+      case D3D10_SB_OPERAND_INDEX_1D: // SM 5.0-
+        LB = RangeID;
+        RangeSize = 1;
+        break;
+      case D3D10_SB_OPERAND_INDEX_3D: // SM 5.1
+        LB = Inst.m_Operands[0].m_Index[1].m_RegIndex;
+        RangeSize = Inst.m_Operands[0].m_Index[2].m_RegIndex != UINT_MAX ? Inst.m_Operands[0].m_Index[2].m_RegIndex - LB + 1 : UINT_MAX;
+        break;
+      default:
+        DXASSERT_DXBC(false);
+        IFTARG(NULL);
+      }
+      R.SetLowerBound(LB);
+      R.SetRangeSize(RangeSize);
+      R.SetSpaceID(Inst.m_SamplerDecl.Space);
+      // Declare global variable.
+      R.SetGlobalName(SynthesizeResGVName("S", R.GetID()));
+      string ResTypeName("dx.types.Sampler");
+      StructType *pResType = m_pModule->getTypeByName(ResTypeName);
+      if (pResType == nullptr) {
+        pResType = StructType::create(m_Ctx, ResTypeName);
+      }
+      R.SetGlobalSymbol(DeclareUndefPtr(pResType, DXIL::kDeviceMemoryAddrSpace));
+      R.SetHandle(nullptr);
+
+      // Sampler-specific state.
+      R.SetSamplerKind(DXBC::GetSamplerKind(Inst.m_SamplerDecl.SamplerMode));
+
+      // Record shader register/rangeID mapping for upcoming instruction conversion.
+      DXASSERT(m_SamplerRangeMap.find(RangeID) == m_SamplerRangeMap.end(), "otherwise overlapping declarations");
+      m_SamplerRangeMap[RangeID] = R.GetID();
+      break;
+    }
+
+    case D3D10_SB_OPCODE_DCL_RESOURCE:
+    case D3D11_SB_OPCODE_DCL_RESOURCE_RAW:
+    case D3D11_SB_OPCODE_DCL_RESOURCE_STRUCTURED: {
+      // Record this SRV declaration in DxilModule.
+      unsigned ID = m_pPR->AddSRV(unique_ptr<DxilResource>(new DxilResource));
+      DxilResource &R = m_pPR->GetSRV(ID);  // R == record
+      R.SetID(ID);
+      R.SetRW(false);
+      // Root signature bindings.
+      unsigned RangeID = Inst.m_Operands[0].m_Index[0].m_RegIndex;
+      unsigned LB, RangeSize;
+      if (IsSM51Plus()) {
+        LB = Inst.m_Operands[0].m_Index[1].m_RegIndex;
+        RangeSize = Inst.m_Operands[0].m_Index[2].m_RegIndex != UINT_MAX ? Inst.m_Operands[0].m_Index[2].m_RegIndex - LB + 1 : UINT_MAX;
+      } else {
+        LB = RangeID;
+        RangeSize = 1;
+      }
+      R.SetLowerBound(LB);
+      R.SetRangeSize(RangeSize);
+      R.SetHandle(nullptr);
+
+      // Resource-specific state.
+      StructType *pResType = nullptr;
+      switch (Inst.OpCode()) {
+      case D3D10_SB_OPCODE_DCL_RESOURCE: {
+        R.SetSpaceID(Inst.m_ResourceDecl.Space);
+        R.SetKind(DXBC::GetResourceKind(Inst.m_ResourceDecl.Dimension));
+        const unsigned kTypedBufferElementSizeInBytes = 4;
+        R.SetElementStride(kTypedBufferElementSizeInBytes);
+        R.SetSampleCount(Inst.m_ResourceDecl.SampleCount);
+        CompType DeclCT = DXBC::GetDeclResCompType(Inst.m_ResourceDecl.ReturnType[0]);
+        if (DeclCT.IsInvalid()) DeclCT = CompType::getU32();
+        R.SetCompType(DeclCT);
+        pResType = GetTypedResElemType(DeclCT);
+        break;
+      }
+      case D3D11_SB_OPCODE_DCL_RESOURCE_RAW: {
+        R.SetSpaceID(Inst.m_RawSRVDecl.Space);
+        R.SetKind(DxilResource::Kind::RawBuffer);
+        const unsigned kRawBufferElementSizeInBytes = 1;
+        R.SetElementStride(kRawBufferElementSizeInBytes);
+        pResType = GetTypedResElemType(CompType::getU32());
+        break;
+      }
+      case D3D11_SB_OPCODE_DCL_RESOURCE_STRUCTURED: {
+        R.SetSpaceID(Inst.m_StructuredSRVDecl.Space);
+        R.SetKind(DxilResource::Kind::StructuredBuffer);
+        unsigned Stride = Inst.m_StructuredSRVDecl.ByteStride;
+        R.SetElementStride(Stride);
+        pResType = GetStructResElemType(Stride);
+        break;
+      }
+      default: ;
+      }
+
+      // Declare global variable.
+      R.SetGlobalName(SynthesizeResGVName("T", R.GetID()));
+      R.SetGlobalSymbol(DeclareUndefPtr(pResType, DXIL::kDeviceMemoryAddrSpace));
+
+      // Record shader register/rangeID mapping for upcoming instruction conversion.
+      DXASSERT(m_SRVRangeMap.find(RangeID) == m_SRVRangeMap.end(), "otherwise overlapping declarations");
+      m_SRVRangeMap[RangeID] = R.GetID();
+      break;
+    }
+
+    case D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED:
+    case D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW:
+    case D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED: {
+      // Record this UAV declaration in DxilModule.
+      unsigned ID = m_pPR->AddUAV(unique_ptr<DxilResource>(new DxilResource));
+      DxilResource &R = m_pPR->GetUAV(ID);  // R == record
+      R.SetID(ID);
+      R.SetRW(true);
+      // Root signature bindings.
+      unsigned RangeID = Inst.m_Operands[0].m_Index[0].m_RegIndex;
+      unsigned LB, RangeSize;
+      if (IsSM51Plus()) {
+        LB = Inst.m_Operands[0].m_Index[1].m_RegIndex;
+        RangeSize = Inst.m_Operands[0].m_Index[2].m_RegIndex != UINT_MAX ? Inst.m_Operands[0].m_Index[2].m_RegIndex - LB + 1 : UINT_MAX;
+      } else {
+        LB = RangeID;
+        RangeSize = 1;
+      }
+      R.SetLowerBound(LB);
+      R.SetRangeSize(RangeSize);
+      R.SetHandle(nullptr);
+
+      // Resource-specific state.
+      string GVTypeName;
+      raw_string_ostream GVTypeNameStream(GVTypeName);
+      StructType *pResType = nullptr;
+      unsigned Flags = 0;
+      switch (Inst.OpCode()) {
+      case D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED: {
+        R.SetSpaceID(Inst.m_TypedUAVDecl.Space);
+        Flags = Inst.m_TypedUAVDecl.Flags;
+        R.SetKind(DXBC::GetResourceKind(Inst.m_TypedUAVDecl.Dimension));
+        const unsigned kTypedBufferElementSizeInBytes = 4;
+        R.SetElementStride(kTypedBufferElementSizeInBytes);
+        CompType DeclCT = DXBC::GetDeclResCompType(Inst.m_TypedUAVDecl.ReturnType[0]);
+        if (DeclCT.IsInvalid()) DeclCT = CompType::getU32();
+        R.SetCompType(DeclCT);
+        pResType = GetTypedResElemType(DeclCT);
+        break;
+      }
+      case D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW: {
+        R.SetSpaceID(Inst.m_RawUAVDecl.Space);
+        R.SetKind(DxilResource::Kind::RawBuffer);
+        Flags = Inst.m_RawUAVDecl.Flags;
+        const unsigned kRawBufferElementSizeInBytes = 1;
+        R.SetElementStride(kRawBufferElementSizeInBytes);
+        pResType = GetTypedResElemType(CompType::getU32());
+        break;
+      }
+      case D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED: {
+        R.SetSpaceID(Inst.m_StructuredUAVDecl.Space);
+        R.SetKind(DxilResource::Kind::StructuredBuffer);
+        Flags = Inst.m_StructuredUAVDecl.Flags;
+        unsigned Stride = Inst.m_StructuredUAVDecl.ByteStride;
+        R.SetElementStride(Stride);
+        pResType = GetStructResElemType(Stride);
+        break;
+      }
+      default: ;
+      }
+
+      R.SetGloballyCoherent((Flags & D3D11_SB_GLOBALLY_COHERENT_ACCESS) != 0);
+      R.SetHasCounter((Flags & D3D11_SB_UAV_HAS_ORDER_PRESERVING_COUNTER) != 0);
+      R.SetROV((Flags & D3D11_SB_RASTERIZER_ORDERED_ACCESS) != 0);
+
+      // Declare global variable.
+      R.SetGlobalName(SynthesizeResGVName("U", R.GetID()));
+      R.SetGlobalSymbol(DeclareUndefPtr(pResType, DXIL::kDeviceMemoryAddrSpace));
+
+      // Record shader register/rangeID mapping for upcoming instruction conversion.
+      DXASSERT(m_UAVRangeMap.find(RangeID) == m_UAVRangeMap.end(), "otherwise overlapping declarations");
+      m_UAVRangeMap[RangeID] = R.GetID();
+      break;
+    }
+    
+    case D3D10_SB_OPCODE_DCL_INDEX_RANGE: {
+      unsigned RowRegIdx = (Inst.m_Operands[0].m_IndexDimension == D3D10_SB_OPERAND_INDEX_1D) ? 0 : 1;
+      SignatureHelper::Range R;
+      R.StartRow      = Inst.m_Operands[0].m_Index[RowRegIdx].m_RegIndex;
+      R.StartCol      = CMask::FromDXBC(Inst.m_Operands[0].m_WriteMask).GetFirstActiveComp();
+      R.Rows          = Inst.m_IndexRangeDecl.RegCount;
+      R.Cols          = CMask::FromDXBC(Inst.m_Operands[0].m_WriteMask).GetNumActiveRangeComps();
+      R.OutputStream  = CurrentOutputStream;
+
+      switch (Inst.m_Operands[0].m_Type) {
+      case D3D10_SB_OPERAND_TYPE_INPUT:
+        m_pInputSignature->m_Ranges.emplace_back(R);
+        break;
+      case D3D10_SB_OPERAND_TYPE_OUTPUT:
+        if (!m_pSM->IsHS() || m_bControlPointPhase) {
+          m_pOutputSignature->m_Ranges.emplace_back(R);
+        } else {
+          DXASSERT_NOMSG(m_pSM->IsHS() && bPatchConstantPhase);
+          m_pPatchConstantSignature->m_Ranges.emplace_back(R);
+        }
+        break;
+      case D3D11_SB_OPERAND_TYPE_INPUT_PATCH_CONSTANT:
+        DXASSERT_DXBC(m_pSM->IsHS() || m_pSM->IsDS());
+        m_pPatchConstantSignature->m_Ranges.emplace_back(R);
+        break;
+      case D3D11_SB_OPERAND_TYPE_INPUT_CONTROL_POINT:
+        DXASSERT_DXBC(m_pSM->IsHS() || m_pSM->IsDS());
+        m_pInputSignature->m_Ranges.emplace_back(R);
+        break;
+      default:
+        DXASSERT_DXBC(false);
+      }
+      break;
+    }
+
+    case D3D10_SB_OPCODE_DCL_GS_INPUT_PRIMITIVE:
+      m_pPR->SetInputPrimitive(DXBC::GetInputPrimitive(Inst.m_InputPrimitiveDecl.Primitive));
+      break;
+
+    case D3D10_SB_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY:
+      m_pPR->SetStreamPrimitiveTopology(DXBC::GetPrimitiveTopology(Inst.m_OutputTopologyDecl.Topology));
+      break;
+
+    case D3D10_SB_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT:
+      m_pPR->SetMaxVertexCount(Inst.m_GSMaxOutputVertexCountDecl.MaxOutputVertexCount);
+      break;
+
+    case D3D10_SB_OPCODE_DCL_INPUT: {
+      D3D10_SB_OPERAND_TYPE RegType = Inst.m_Operands[0].m_Type;
+      switch (RegType) {
+      case D3D11_SB_OPERAND_TYPE_INPUT_COVERAGE_MASK:
+        m_pInputSignature->m_bHasInputCoverage = true;
+        break;
+
+      case D3D11_SB_OPERAND_TYPE_INNER_COVERAGE:
+        m_pInputSignature->m_bHasInnerInputCoverage = true;
+        break;
+
+      case D3D11_SB_OPERAND_TYPE_INPUT_THREAD_ID:
+      case D3D11_SB_OPERAND_TYPE_INPUT_THREAD_GROUP_ID:
+      case D3D11_SB_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP:
+      case D3D11_SB_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED:
+      case D3D11_SB_OPERAND_TYPE_INPUT_DOMAIN_POINT:
+      case D3D11_SB_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID:
+      case D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID:
+      case D3D11_SB_OPERAND_TYPE_INPUT_FORK_INSTANCE_ID:
+      case D3D11_SB_OPERAND_TYPE_INPUT_JOIN_INSTANCE_ID:
+      case D3D11_SB_OPERAND_TYPE_CYCLE_COUNTER:
+      case D3D11_SB_OPERAND_TYPE_INPUT_GS_INSTANCE_ID:
+        break;
+
+      default: {
+        unsigned NumUnits, Row;
+        switch (Inst.m_Operands[0].m_IndexDimension) {
+        case D3D10_SB_OPERAND_INDEX_1D:
+          NumUnits = 0;
+          Row = Inst.m_Operands[0].m_Index[0].m_RegIndex;
+          break;
+
+        case D3D10_SB_OPERAND_INDEX_2D:
+          NumUnits = Inst.m_Operands[0].m_Index[0].m_RegIndex;
+          Row = Inst.m_Operands[0].m_Index[1].m_RegIndex;
+          break;
+
+        default:
+          DXASSERT(false, "there should no other index dimensions");
+        }
+
+        SignatureHelper::UsedElement E;
+        E.NumUnits          = NumUnits;
+        E.Row               = Row;
+        E.StartCol          = CMask::FromDXBC(Inst.m_Operands[0].m_WriteMask).GetFirstActiveComp();
+        E.Cols              = CMask::FromDXBC(Inst.m_Operands[0].m_WriteMask).GetNumActiveRangeComps();
+        E.InterpolationMode = D3D_INTERPOLATION_UNDEFINED;
+        E.MinPrecision      = Inst.m_Operands[0].m_MinPrecision;
+
+        if (RegType == D3D10_SB_OPERAND_TYPE_INPUT) {
+          m_pInputSignature->m_UsedElements.emplace_back(E);
+        } else {
+          if (m_pSM->IsDS()) {
+            switch (RegType) {
+            case D3D11_SB_OPERAND_TYPE_INPUT_CONTROL_POINT:
+              m_pInputSignature->m_UsedElements.emplace_back(E);
+              break;
+            case D3D11_SB_OPERAND_TYPE_INPUT_PATCH_CONSTANT:
+              m_pPatchConstantSignature->m_UsedElements.emplace_back(E);
+              break;
+            default:
+              DXASSERT(false, "check unsupported case");
+              break;
+            }
+          }
+
+          if (m_pSM->IsHS()) {
+            switch (RegType) {
+            case D3D11_SB_OPERAND_TYPE_INPUT_CONTROL_POINT:
+              m_pInputSignature->m_UsedElements.emplace_back(E);
+              break;
+            case D3D11_SB_OPERAND_TYPE_INPUT_PATCH_CONSTANT:
+              break;
+            case D3D11_SB_OPERAND_TYPE_OUTPUT_CONTROL_POINT:
+              break;
+            default:
+              DXASSERT(false, "check unsupported case");
+              break;
+            }
+          }
+        }
+
+        break;
+      }
+      }
+
+      break;
+    }
+
+    case D3D10_SB_OPCODE_DCL_INPUT_SGV: {
+      SignatureHelper::UsedElement E;
+      E.Row               = Inst.m_Operands[0].m_Index[0].m_RegIndex;
+      E.StartCol          = CMask::FromDXBC(Inst.m_Operands[0].m_WriteMask).GetFirstActiveComp();
+      E.Cols              = CMask::FromDXBC(Inst.m_Operands[0].m_WriteMask).GetNumActiveRangeComps();
+      E.InterpolationMode = D3D_INTERPOLATION_UNDEFINED;
+      E.MinPrecision      = Inst.m_Operands[0].m_MinPrecision;
+
+      m_pInputSignature->m_UsedElements.emplace_back(E);
+      break;
+    }
+
+    case D3D10_SB_OPCODE_DCL_INPUT_SIV: {
+      unsigned NumUnits = 0;
+      unsigned Row = Inst.m_Operands[0].m_Index[0].m_RegIndex;
+      if (m_pSM->IsGS()) {
+        NumUnits = Inst.m_Operands[0].m_Index[0].m_RegIndex;
+        Row = Inst.m_Operands[0].m_Index[1].m_RegIndex;
+      }
+
+      SignatureHelper::UsedElement E;
+      E.NumUnits          = NumUnits;
+      E.Row               = Row;
+      E.StartCol          = CMask::FromDXBC(Inst.m_Operands[0].m_WriteMask).GetFirstActiveComp();
+      E.Cols              = CMask::FromDXBC(Inst.m_Operands[0].m_WriteMask).GetNumActiveRangeComps();
+      E.InterpolationMode = D3D_INTERPOLATION_UNDEFINED;
+      E.MinPrecision      = Inst.m_Operands[0].m_MinPrecision;
+
+      switch (Inst.m_Operands[0].m_Type) {
+      case D3D10_SB_OPERAND_TYPE_INPUT:
+        m_pInputSignature->m_UsedElements.emplace_back(E);
+        break;
+
+      case D3D11_SB_OPERAND_TYPE_INPUT_PATCH_CONSTANT:
+        m_pPatchConstantSignature->m_UsedElements.emplace_back(E);
+        break;
+
+      default:
+        DXASSERT(false, "missing case");
+        break;
+      }
+
+      break;
+    }
+
+    case D3D10_SB_OPCODE_DCL_INPUT_PS: {
+      SignatureHelper::UsedElement E;
+      E.Row               = Inst.m_Operands[0].m_Index[0].m_RegIndex;
+      E.StartCol          = CMask::FromDXBC(Inst.m_Operands[0].m_WriteMask).GetFirstActiveComp();
+      E.Cols              = CMask::FromDXBC(Inst.m_Operands[0].m_WriteMask).GetNumActiveRangeComps();
+      E.InterpolationMode = (D3D_INTERPOLATION_MODE)Inst.m_InputPSDecl.InterpolationMode;
+      E.MinPrecision      = Inst.m_Operands[0].m_MinPrecision;
+
+      m_pInputSignature->m_UsedElements.emplace_back(E);
+      break;
+    }
+
+    case D3D10_SB_OPCODE_DCL_INPUT_PS_SGV: {
+      SignatureHelper::UsedElement E;
+      E.Row               = Inst.m_Operands[0].m_Index[0].m_RegIndex;
+      E.StartCol          = CMask::FromDXBC(Inst.m_Operands[0].m_WriteMask).GetFirstActiveComp();
+      E.Cols              = CMask::FromDXBC(Inst.m_Operands[0].m_WriteMask).GetNumActiveRangeComps();
+      E.InterpolationMode = (D3D_INTERPOLATION_MODE)Inst.m_InputPSDeclSGV.InterpolationMode;
+      E.MinPrecision      = Inst.m_Operands[0].m_MinPrecision;
+
+      m_pInputSignature->m_UsedElements.emplace_back(E);
+      break;
+    }
+
+    case D3D10_SB_OPCODE_DCL_INPUT_PS_SIV: {
+      SignatureHelper::UsedElement E;
+      E.Row               = Inst.m_Operands[0].m_Index[0].m_RegIndex;
+      E.StartCol          = CMask::FromDXBC(Inst.m_Operands[0].m_WriteMask).GetFirstActiveComp();
+      E.Cols              = CMask::FromDXBC(Inst.m_Operands[0].m_WriteMask).GetNumActiveRangeComps();
+      E.InterpolationMode = (D3D_INTERPOLATION_MODE)Inst.m_InputPSDeclSIV.InterpolationMode;
+      E.MinPrecision      = Inst.m_Operands[0].m_MinPrecision;
+
+      m_pInputSignature->m_UsedElements.emplace_back(E);
+      break;
+    }
+
+    case D3D10_SB_OPCODE_DCL_OUTPUT: {
+      D3D10_SB_OPERAND_TYPE RegType = Inst.m_Operands[0].m_Type;
+      switch (RegType) {
+      case D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH:
+      case D3D11_SB_OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL:
+      case D3D11_SB_OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL:
+        m_DepthRegType = RegType;
+        __fallthrough;
+      case D3D11_SB_OPERAND_TYPE_OUTPUT_STENCIL_REF:
+      case D3D10_SB_OPERAND_TYPE_OUTPUT_COVERAGE_MASK: {
+        m_bHasStencilRef = RegType == D3D11_SB_OPERAND_TYPE_OUTPUT_STENCIL_REF;
+        m_bHasCoverageOut = RegType == D3D10_SB_OPERAND_TYPE_OUTPUT_COVERAGE_MASK;
+
+        SignatureHelper::UsedElement E;
+        E.Row               = Semantic::kUndefinedRow;
+        E.StartCol          = 0;
+        E.Cols              = 1;
+        E.InterpolationMode = D3D_INTERPOLATION_UNDEFINED;
+        E.MinPrecision      = Inst.m_Operands[0].m_MinPrecision;
+
+        m_pOutputSignature->m_UsedElements.emplace_back(E);
+        break;
+      }
+
+      default: {
+        SignatureHelper::UsedElement E;
+        E.Row               = Inst.m_Operands[0].m_Index[0].m_RegIndex;
+        E.StartCol          = CMask::FromDXBC(Inst.m_Operands[0].m_WriteMask).GetFirstActiveComp();
+        E.Cols              = CMask::FromDXBC(Inst.m_Operands[0].m_WriteMask).GetNumActiveRangeComps();
+        E.InterpolationMode = D3D_INTERPOLATION_UNDEFINED;
+        E.MinPrecision      = Inst.m_Operands[0].m_MinPrecision;
+        E.OutputStream      = CurrentOutputStream;
+
+        if (!m_pSM->IsHS() || m_bControlPointPhase) {
+          m_pOutputSignature->m_UsedElements.emplace_back(E);
+        } else {
+          DXASSERT_NOMSG(m_pSM->IsHS() && bPatchConstantPhase);
+          m_pPatchConstantSignature->m_UsedElements.emplace_back(E);
+        }
+
+        MaxOutputRegister = std::max(MaxOutputRegister, E.Row);
+        break;
+      }
+      }
+      
+      break;
+    }
+
+    case D3D10_SB_OPCODE_DCL_OUTPUT_SGV:
+    case D3D10_SB_OPCODE_DCL_OUTPUT_SIV: {
+      SignatureHelper::UsedElement E;
+      E.Row               = Inst.m_Operands[0].m_Index[0].m_RegIndex;
+      E.StartCol          = CMask::FromDXBC(Inst.m_Operands[0].m_WriteMask).GetFirstActiveComp();
+      E.Cols              = CMask::FromDXBC(Inst.m_Operands[0].m_WriteMask).GetNumActiveRangeComps();
+      E.InterpolationMode = D3D_INTERPOLATION_UNDEFINED;
+      E.MinPrecision      = Inst.m_Operands[0].m_MinPrecision;
+      E.OutputStream      = CurrentOutputStream;
+
+      if (!m_pSM->IsHS() || m_bControlPointPhase) {
+        m_pOutputSignature->m_UsedElements.emplace_back(E);
+      } else {
+        DXASSERT_NOMSG(m_pSM->IsHS() && bPatchConstantPhase);
+        m_pPatchConstantSignature->m_UsedElements.emplace_back(E);
+      }
+
+      MaxOutputRegister = std::max(MaxOutputRegister, E.Row);
+      break;
+    }
+    
+    case D3D10_SB_OPCODE_DCL_TEMPS:
+      m_NumTempRegs = std::max(m_NumTempRegs, Inst.m_TempsDecl.NumTemps);
+      break;
+
+    case D3D10_SB_OPCODE_DCL_INDEXABLE_TEMP: {
+      // Record x-register.
+      unsigned Reg = Inst.m_IndexableTempDecl.IndexableTempNumber;
+      unsigned NumRegs = Inst.m_IndexableTempDecl.NumRegisters;
+      CMask Mask = CMask::FromDXBC(Inst.m_IndexableTempDecl.Mask);
+      IndexableReg IR = { nullptr, nullptr, NumRegs, Mask.GetNumActiveRangeComps(), true };
+
+      if (!bPatchConstantPhase) {
+        // This is the main shader.
+        DXASSERT_DXBC(m_IndexableRegs.find(Reg) == m_IndexableRegs.end());
+        m_IndexableRegs[Reg] = IR;
+      } else {
+        // This is patch constant function.
+        // Can have dcl per phase
+        auto itIR = m_PatchConstantIndexableRegs.find(Reg);
+        if (itIR != m_PatchConstantIndexableRegs.end()) {
+          auto &theIR = itIR->second;
+          theIR.NumComps = std::max(theIR.NumComps, IR.NumComps);
+          theIR.NumComps = std::max(theIR.NumRegs, IR.NumRegs);
+        } else {
+          m_PatchConstantIndexableRegs[Reg] = IR;
+        }
+      }
+      break;
+    }
+
+    case D3D10_SB_OPCODE_DCL_GLOBAL_FLAGS:
+      SetShaderGlobalFlags(Inst.m_GlobalFlagsDecl.Flags);
+      break;
+
+    case D3D11_SB_OPCODE_DCL_STREAM: {
+      BYTE Stream = (BYTE)Inst.m_Operands[0].m_Index[0].m_RegIndex;
+      IFTBOOL(Stream < DXIL::kNumOutputStreams, DXC_E_INCORRECT_DXBC);
+      CurrentOutputStream = Stream;
+      m_pPR->SetStreamActive(Stream, true);
+      break;
+    }
+    
+    case D3D11_SB_OPCODE_HS_DECLS:
+      break;
+
+    case D3D11_SB_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT:
+      m_pPR->SetInputControlPointCount(Inst.m_InputControlPointCountDecl.InputControlPointCount);
+      break;
+
+    case D3D11_SB_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT:
+      m_pPR->SetOutputControlPointCount(Inst.m_OutputControlPointCountDecl.OutputControlPointCount);
+      break;
+
+    case D3D11_SB_OPCODE_DCL_TESS_DOMAIN:
+      m_pPR->SetTessellatorDomain(DXBC::GetTessellatorDomain(Inst.m_TessellatorDomainDecl.TessellatorDomain));
+      break;
+
+    case D3D11_SB_OPCODE_DCL_TESS_PARTITIONING:
+      m_pPR->SetTessellatorPartitioning(DXBC::GetTessellatorPartitioning(Inst.m_TessellatorPartitioningDecl.TessellatorPartitioning));
+      break;
+
+    case D3D11_SB_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE:
+      m_pPR->SetTessellatorOutputPrimitive(DXBC::GetTessellatorOutputPrimitive(Inst.m_TessellatorOutputPrimitiveDecl.TessellatorOutputPrimitive));
+      break;
+
+    case D3D11_SB_OPCODE_DCL_HS_MAX_TESSFACTOR:
+      m_pPR->SetMaxTessellationFactor(Inst.m_HSMaxTessFactorDecl.MaxTessFactor);
+      break;
+
+    case D3D11_SB_OPCODE_HS_CONTROL_POINT_PHASE:
+      DXASSERT_NOMSG(!m_bControlPointPhase && !bPatchConstantPhase);
+      m_bControlPointPhase = true;
+      break;
+
+    case D3D11_SB_OPCODE_HS_FORK_PHASE:
+    case D3D11_SB_OPCODE_HS_JOIN_PHASE:
+      m_bControlPointPhase = false;
+      bPatchConstantPhase = true;
+      m_PatchConstantPhaseInstanceCounts.push_back(1);
+      break;
+
+    case D3D11_SB_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT:
+      m_PatchConstantPhaseInstanceCounts.back() = Inst.m_HSForkPhaseInstanceCountDecl.InstanceCount;
+      break;
+
+    case D3D11_SB_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT:
+      m_PatchConstantPhaseInstanceCounts.back() = Inst.m_HSJoinPhaseInstanceCountDecl.InstanceCount;
+      break;
+
+    case D3D11_SB_OPCODE_DCL_THREAD_GROUP:
+      m_pPR->SetNumThreads(Inst.m_ThreadGroupDecl.x,
+                           Inst.m_ThreadGroupDecl.y,
+                           Inst.m_ThreadGroupDecl.z);
+      break;
+
+    case D3D11_SB_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW:
+    case D3D11_SB_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED: {
+      TGSMEntry E;
+      E.Id = m_TGSMCount++;
+
+      if (Inst.OpCode() == D3D11_SB_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW) {
+        E.Stride = 1;
+        E.Count = Inst.m_RawTGSMDecl.ByteCount;
+      } else {
+        E.Stride = Inst.m_StructuredTGSMDecl.StructByteStride;
+        E.Count = Inst.m_StructuredTGSMDecl.StructCount;
+      }
+
+      // Declare global variable.
+      unsigned SizeInBytes = E.Stride*E.Count;
+      Type *pArrayType = ArrayType::get(Type::getInt8Ty(m_Ctx), SizeInBytes);
+      E.pVar = new GlobalVariable(*m_pModule, pArrayType, 
+                                  false, GlobalValue::InternalLinkage, 
+                                  UndefValue::get(pArrayType), 
+                                  Twine("TGSM") + Twine(E.Id), nullptr, 
+                                  GlobalVariable::NotThreadLocal, DXIL::kTGSMAddrSpace);
+      E.pVar->setAlignment(kRegCompAlignment);
+
+      // Mark GV as being used for LLVM.
+      m_pPR->GetLLVMUsed().push_back(E.pVar);
+
+      m_TGSMMap[Inst.m_Operands[0].m_Index[0].m_RegIndex] = E;
+      break;
+    }
+
+    case D3D11_SB_OPCODE_DCL_GS_INSTANCE_COUNT:
+      m_pPR->SetGSInstanceCount(Inst.m_GSInstanceCountDecl.InstanceCount);
+      break;
+
+    case D3D10_SB_OPCODE_CUSTOMDATA:
+      break;
+
+    case D3D11_SB_OPCODE_DCL_FUNCTION_BODY: {
+      DXASSERT_DXBC(Inst.m_NumOperands == 0);
+      unsigned FBIdx = Inst.m_FunctionBodyDecl.FunctionBodyNumber;
+      m_InterfaceFunctionBodies[FBIdx].pFunc = nullptr;
+      break;
+    }
+
+    case D3D11_SB_OPCODE_DCL_FUNCTION_TABLE: {
+      DXASSERT_DXBC(Inst.m_NumOperands == 0);
+      auto& FnTable = m_FunctionTables[Inst.m_FunctionTableDecl.FunctionTableNumber];
+      FnTable.assign(Inst.m_FunctionTableDecl.pFunctionIdentifiers, Inst.m_FunctionTableDecl.pFunctionIdentifiers + Inst.m_FunctionTableDecl.TableLength);
+      break;
+    }
+
+    case D3D11_SB_OPCODE_DCL_INTERFACE: {
+      DXASSERT_DXBC(Inst.m_NumOperands == 0);
+      auto& Iface = m_Interfaces[Inst.m_InterfaceDecl.InterfaceNumber];
+      Iface.Tables.assign(Inst.m_InterfaceDecl.pFunctionTableIdentifiers, Inst.m_InterfaceDecl.pFunctionTableIdentifiers + Inst.m_InterfaceDecl.TableLength);
+      for (unsigned TableIdx : Iface.Tables) {
+          DXASSERT_DXBC(m_FunctionTables[TableIdx].size() == Inst.m_InterfaceDecl.ExpectedTableSize);
+      }
+      Iface.bDynamicallyIndexed = Inst.m_InterfaceDecl.bDynamicallyIndexed;
+      Iface.NumArrayEntries = Inst.m_InterfaceDecl.ArrayLength;
+      m_NumIfaces = std::max(m_NumIfaces, Inst.m_InterfaceDecl.InterfaceNumber + Iface.NumArrayEntries);
+      InsertInterfacesResourceDecls();
+      break;
+    }
+
+    case D3D10_SB_OPCODE_LABEL: {
+      m_bControlPointPhase = false;
+      bPatchConstantPhase = false;
+      DXASSERT_DXBC(Inst.m_NumOperands == 1);
+      DXASSERT_DXBC(Inst.m_Operands[0].m_Type == D3D10_SB_OPERAND_TYPE_LABEL ||
+                    Inst.m_Operands[0].m_Type == D3D11_SB_OPERAND_TYPE_FUNCTION_BODY);
+      FunctionType *pFuncType = FunctionType::get(Type::getVoidTy(m_Ctx), false);
+      unsigned LabelIdx = Inst.m_Operands[0].m_Index[0].m_RegIndex;
+      LabelEntry Label;
+      const bool IsFb = Inst.m_Operands[0].m_Type == D3D11_SB_OPERAND_TYPE_FUNCTION_BODY;
+      auto& LabelMap = IsFb ? m_InterfaceFunctionBodies : m_Labels;
+      DXASSERT_DXBC((LabelMap.find(LabelIdx) == LabelMap.end()) == !IsFb); // Function bodies should be pre-declared, labels aren't
+      Label.pFunc = Function::Create(pFuncType, GlobalValue::LinkageTypes::InternalLinkage,
+                                     StringRef(IsFb ? "dx.fb." : "dx.label.") + Twine(LabelIdx), m_pModule.get());
+      Label.pFunc->setCallingConv(CallingConv::C);
+      LabelMap[LabelIdx] = Label;
+      break;
+    }
+
+    default:
+      break;
+    }
+  }
+}
+
+void DxbcConverter::ConvertInstructions(D3D10ShaderBinary::CShaderCodeParser &Parser) {
+  if (m_pPR->GetShaderModel()->IsGS()) {
+    // Set GS active stream mask.
+    if (m_pPR->GetActiveStreamMask() == 0
+        && !m_pPR->GetOutputSignature().GetElements().empty())
+      m_pPR->SetStreamActive(0, true);
+
+    // Make sure GS instance count is at least 1
+    if (m_pPR->GetGSInstanceCount() == 0)
+      m_pPR->SetGSInstanceCount(1);
+  }
+
+  // Add entry function declaration.
+  m_pPR->SetEntryFunctionName("main");
+  FunctionType *pEntryFuncType = FunctionType::get(Type::getVoidTy(m_Ctx), false);
+  Function *pFunction = Function::Create(pEntryFuncType, GlobalValue::LinkageTypes::ExternalLinkage, 
+                                         m_pPR->GetEntryFunctionName(), m_pModule.get());
+  pFunction->setCallingConv(CallingConv::C);
+  m_pPR->SetEntryFunction(pFunction);
+
+  // Create main entry function.
+  BasicBlock *pBB = BasicBlock::Create(m_Ctx, "entry", pFunction);
+  m_pBuilder = std::make_unique< IRBuilder<> >(pBB);
+
+  FastMathFlags FMF;
+  if (!m_pPR->m_ShaderFlags.GetDisableMathRefactoring()) {
+    FMF.setUnsafeAlgebra();
+  }
+  m_pBuilder->SetFastMathFlags(FMF);
+
+  // Empty instruction stream.
+  if (Parser.EndOfShader()) {
+    m_pBuilder->CreateRetVoid();
+    return;
+  }
+
+  m_pUnusedF32 = UndefValue::get(Type::getFloatTy(m_Ctx));
+  m_pUnusedI32 = UndefValue::get(Type::getInt32Ty(m_Ctx));
+
+  // Create entry function scope.
+  DXASSERT_NOMSG(m_ScopeStack.IsEmpty());
+  (void)m_ScopeStack.Push(Scope::Function, nullptr);
+  m_ScopeStack.Top().SetEntry(true);
+
+  DeclareIndexableRegisters();
+
+  // Parse DXBC instructions and emit DXIL equivalents.
+  Value *pHullLoopInductionVar = nullptr;
+  m_bControlPointPhase = false;
+  bool bMustCloseHullLoop = false;
+  m_bPatchConstantPhase = false;
+  bool bInsertResourceHandles = true;
+  unsigned ForkJoinPhaseIndex = 0;
+  D3D10ShaderBinary::CInstruction Inst;
+  bool bPasshThroughCP = false;
+  bool bDoneParsing = false;
+  for (;;) {
+    AdvanceDxbcInstructionStream(Parser, Inst, bDoneParsing);
+
+    // Terminate HS phase (HullLoop), if necessary.
+    if (m_bPatchConstantPhase) {
+      bool bTerminateHullLoop = false;
+
+      if (bDoneParsing || bMustCloseHullLoop) {
+        bTerminateHullLoop = true;
+      } else {
+        switch (Inst.OpCode()) {
+        case D3D11_SB_OPCODE_HS_FORK_PHASE:
+        case D3D11_SB_OPCODE_HS_JOIN_PHASE:
+        case D3D10_SB_OPCODE_LABEL:
+          bTerminateHullLoop = true;
+          break;
+        }
+      }
+
+      if (bTerminateHullLoop) {
+        IFTBOOL(m_ScopeStack.Top().Kind == Scope::HullLoop, E_FAIL);
+        // Hull shader control point phase fork/join.
+        Scope &HullScope = m_ScopeStack.Top();
+
+        // Increment HullLoop instance ID.
+        Value *pOldInstID = m_pBuilder->CreateLoad(HullScope.pInductionVar);
+        Value *pNewInstID = m_pBuilder->CreateAdd(pOldInstID, m_pOP->GetU32Const(1));
+        (void)m_pBuilder->CreateStore(pNewInstID, HullScope.pInductionVar);
+
+        // Insert backedge cbranch to HullLoop and AfterHullLoop BBs.
+        Value *pCond = m_pBuilder->CreateICmpULT(pNewInstID, m_pOP->GetU32Const(HullScope.HullLoopTripCount));
+        m_pBuilder->CreateCondBr(pCond, HullScope.pHullLoopBB, HullScope.pPostScopeBB);
+        m_pPR->GetPatchConstantFunction()->getBasicBlockList().push_back(HullScope.pPostScopeBB);
+        m_pBuilder->SetInsertPoint(HullScope.pPostScopeBB);
+        m_ScopeStack.Pop();
+
+        // Skip dead instructions to the next phase, label or EOS.
+        for( ; !bDoneParsing ; ) {
+          if (Inst.OpCode() == D3D11_SB_OPCODE_HS_FORK_PHASE ||
+              Inst.OpCode() == D3D11_SB_OPCODE_HS_JOIN_PHASE ||
+              Inst.OpCode() == D3D10_SB_OPCODE_LABEL)
+            break;
+
+          AdvanceDxbcInstructionStream(Parser, Inst, bDoneParsing);
+        }
+      }
+
+      bMustCloseHullLoop = false;
+    }
+
+    // Terminate function, if necessary.
+    {
+      bool bTerminateFunc = false;
+      if (bDoneParsing) {
+        bTerminateFunc = true;
+      } else {
+        switch (Inst.OpCode()) {
+        case D3D11_SB_OPCODE_HS_FORK_PHASE:
+        case D3D11_SB_OPCODE_HS_JOIN_PHASE:
+          if (!m_bPatchConstantPhase)
+            bTerminateFunc = true;
+          break;
+        case D3D10_SB_OPCODE_LABEL:
+          bTerminateFunc = true;
+          break;
+        }
+      }
+
+      if (bTerminateFunc) {
+        Scope &Scope = m_ScopeStack.FindParentFunction();
+        IFTBOOL(Scope.Kind == Scope::Function, DXC_E_INCORRECT_DXBC);
+        m_pBuilder->CreateRetVoid();
+        m_ScopeStack.Pop();
+        IFT(m_ScopeStack.IsEmpty());
+        m_bPatchConstantPhase = false;
+      }
+    }
+
+    if (bDoneParsing)
+      break;
+
+    m_PreciseMask = CMask(Inst.GetPreciseMask());
+
+    // Fix up output register masks.
+    // DXBC instruction conversion relies on the output mask(s) determining 
+    // what components need to be written.
+    // Some output operand types have write mask that is 0 -- fix this.
+    for (unsigned i = 0; i < std::min(Inst.m_NumOperands, (UINT)2); i++) {
+      D3D10ShaderBinary::COperandBase &O = Inst.m_Operands[i];
+      switch (O.m_Type) {
+      case D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH:
+      case D3D11_SB_OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL:
+      case D3D11_SB_OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL:
+      case D3D11_SB_OPERAND_TYPE_OUTPUT_STENCIL_REF:
+      case D3D10_SB_OPERAND_TYPE_OUTPUT_COVERAGE_MASK:
+        DXASSERT_DXBC(O.m_WriteMask == 0);
+        O.SetMask(D3D10_SB_OPERAND_4_COMPONENT_MASK_X);
+        break;
+      }
+    }
+
+    if (bInsertResourceHandles) {
+      InsertSM50ResourceHandles();
+      bInsertResourceHandles = false;
+    }
+
+    switch (Inst.OpCode()) {
+      //
+      // Declarations.
+      //
+    case D3D10_SB_OPCODE_DCL_RESOURCE:
+    case D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER:
+    case D3D10_SB_OPCODE_DCL_SAMPLER:
+    case D3D10_SB_OPCODE_DCL_INDEX_RANGE:
+    case D3D10_SB_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY:
+    case D3D10_SB_OPCODE_DCL_GS_INPUT_PRIMITIVE:
+    case D3D10_SB_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT:
+    case D3D10_SB_OPCODE_DCL_INPUT:
+    case D3D10_SB_OPCODE_DCL_INPUT_SGV:
+    case D3D10_SB_OPCODE_DCL_INPUT_SIV:
+    case D3D10_SB_OPCODE_DCL_INPUT_PS:
+    case D3D10_SB_OPCODE_DCL_INPUT_PS_SGV:
+    case D3D10_SB_OPCODE_DCL_INPUT_PS_SIV:
+    case D3D10_SB_OPCODE_DCL_OUTPUT:
+    case D3D10_SB_OPCODE_DCL_OUTPUT_SGV:
+    case D3D10_SB_OPCODE_DCL_OUTPUT_SIV:
+    case D3D10_SB_OPCODE_DCL_TEMPS:
+    case D3D10_SB_OPCODE_DCL_INDEXABLE_TEMP:
+      break;
+
+    case D3D10_SB_OPCODE_DCL_GLOBAL_FLAGS:
+      break;
+
+    case D3D11_SB_OPCODE_DCL_STREAM:
+    case D3D11_SB_OPCODE_DCL_FUNCTION_BODY:
+    case D3D11_SB_OPCODE_DCL_FUNCTION_TABLE:
+    case D3D11_SB_OPCODE_DCL_INTERFACE:
+    
+    case D3D11_SB_OPCODE_HS_DECLS:
+
+    case D3D11_SB_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT:
+    case D3D11_SB_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT:
+    case D3D11_SB_OPCODE_DCL_TESS_DOMAIN:
+    case D3D11_SB_OPCODE_DCL_TESS_PARTITIONING:
+    case D3D11_SB_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE:
+    case D3D11_SB_OPCODE_DCL_HS_MAX_TESSFACTOR:
+
+    case D3D11_SB_OPCODE_DCL_THREAD_GROUP:
+    case D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED:
+    case D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW:
+    case D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED:
+    case D3D11_SB_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW:
+    case D3D11_SB_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED:
+    case D3D11_SB_OPCODE_DCL_RESOURCE_RAW:
+    case D3D11_SB_OPCODE_DCL_RESOURCE_STRUCTURED:
+
+    case D3D11_SB_OPCODE_DCL_GS_INSTANCE_COUNT:
+      break;
+
+      //
+      // Immediate constant buffer.
+      //
+    case D3D10_SB_OPCODE_CUSTOMDATA:
+      if (Inst.m_CustomData.Type == D3D10_SB_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER) {
+        unsigned Size = Inst.m_CustomData.DataSizeInBytes >> 2;
+        DXASSERT_DXBC(m_pIcbGV == nullptr && Inst.m_CustomData.DataSizeInBytes == Size*4);
+
+        llvm::Constant *pIcbData = ConstantDataArray::get(m_Ctx, ArrayRef<float>((float*)Inst.m_CustomData.pData, Size));
+        m_pIcbGV = new GlobalVariable(*m_pModule, pIcbData->getType(), true, GlobalValue::InternalLinkage,
+                                      pIcbData, "dx.icb", nullptr, 
+                                      GlobalVariable::NotThreadLocal, DXIL::kImmediateCBufferAddrSpace);
+      }
+      break;
+
+      //
+      // Mov, movc, swapc, dmov, dmovc.
+      //
+    case D3D10_SB_OPCODE_MOV: {
+      CMask WriteMask = CMask::FromDXBC(Inst.m_Operands[0].m_WriteMask);
+      CompType DstType = InferOperandType(Inst, 0, WriteMask);
+      CompType SrcType = InferOperandType(Inst, 1, WriteMask);
+
+      // For mov, movc, and swapc, use integer operation type unless 
+      // operand modifiers imply floating point.
+      CompType OperationType = CompType::getI32();
+
+      if (!DstType.IsInvalid())
+        OperationType = DstType.GetBaseCompType();
+      else if (!SrcType.IsInvalid())
+        OperationType = SrcType;
+
+      if (Inst.m_Operands[1].Modifier() != D3D10_SB_OPERAND_MODIFIER_NONE || Inst.m_bSaturate) {
+        OperationType = CompType::getF32();
+      }
+
+      OperandValue In;
+      LoadOperand(In, Inst, 1, WriteMask, OperationType);
+      StoreOperand(In, Inst, 0, WriteMask, OperationType);
+      break;
+    }
+
+    case D3D10_SB_OPCODE_MOVC: {
+      CMask WriteMask = CMask::FromDXBC(Inst.m_Operands[0].m_WriteMask);
+      CompType DstType = InferOperandType(Inst, 0, WriteMask);
+      CompType Src2Type = InferOperandType(Inst, 2, WriteMask);
+      CompType Src3Type = InferOperandType(Inst, 3, WriteMask);
+
+      CompType OperationType = CompType::getI32();
+
+      if (Src2Type == Src3Type && !Src2Type.IsInvalid())
+        OperationType = Src2Type;
+      else if (!DstType.IsInvalid())
+        OperationType = DstType.GetBaseCompType();
+      else if (!Src2Type.IsInvalid())
+        OperationType = Src2Type;
+      else if (!Src3Type.IsInvalid())
+        OperationType = Src3Type;
+
+      if (Inst.m_Operands[2].Modifier() != D3D10_SB_OPERAND_MODIFIER_NONE ||
+          Inst.m_Operands[3].Modifier() != D3D10_SB_OPERAND_MODIFIER_NONE ||
+          Inst.m_bSaturate) {
+        OperationType = CompType::getF32();
+      }
+
+      OperandValue In1, In2, In3, Out;
+      LoadOperand(In1, Inst, 1, WriteMask, CompType::getI1());
+      LoadOperand(In2, Inst, 2, WriteMask, OperationType);
+      LoadOperand(In3, Inst, 3, WriteMask, OperationType);
+
+      for (BYTE c = 0; c < DXBC::kWidth; c++) {
+        if (!WriteMask.IsSet(c)) continue;
+        Out[c] = m_pBuilder->CreateSelect(In1[c], In2[c], In3[c]);
+      }
+
+      StoreOperand(Out, Inst, 0, WriteMask, OperationType);
+      break;
+    }
+
+    case D3D11_SB_OPCODE_SWAPC: {
+      CMask WriteMask = CMask::FromDXBC(Inst.m_Operands[0].m_WriteMask | Inst.m_Operands[1].m_WriteMask);
+      CMask Dst1Mask = CMask::FromDXBC(Inst.m_Operands[0].m_WriteMask);
+      CMask Dst2Mask = CMask::FromDXBC(Inst.m_Operands[1].m_WriteMask);
+      CompType Dst1Type = InferOperandType(Inst, 0, WriteMask);
+      CompType Dst2Type = InferOperandType(Inst, 1, WriteMask);
+      CompType Src2Type = InferOperandType(Inst, 3, WriteMask);
+      CompType Src3Type = InferOperandType(Inst, 4, WriteMask);
+
+      CompType OperationType = CompType::getI32();
+
+      if (Src2Type == Src3Type && !Src2Type.IsInvalid())
+        OperationType = Src2Type;
+      else if (!Dst1Type.IsInvalid())
+        OperationType = Dst1Type.GetBaseCompType();
+      else if (!Dst2Type.IsInvalid())
+        OperationType = Dst2Type.GetBaseCompType();
+      else if (!Src2Type.IsInvalid())
+        OperationType = Src2Type;
+      else if (!Src3Type.IsInvalid())
+        OperationType = Src3Type;
+
+      if (Inst.m_Operands[3].Modifier() != D3D10_SB_OPERAND_MODIFIER_NONE ||
+          Inst.m_Operands[4].Modifier() != D3D10_SB_OPERAND_MODIFIER_NONE ||
+          Inst.m_bSaturate) {
+        OperationType = CompType::getF32();
+      }
+
+      OperandValue In1, In2, In3, Out1, Out2;
+      LoadOperand(In1, Inst, 2, WriteMask, CompType::getI1());
+      LoadOperand(In2, Inst, 3, WriteMask, OperationType);
+      LoadOperand(In3, Inst, 4, WriteMask, OperationType);
+
+      for (BYTE c = 0; c < DXBC::kWidth; c++) {
+        if (!Dst1Mask.IsSet(c)) continue;
+        Out1[c] = m_pBuilder->CreateSelect(In1[c], In3[c], In2[c]);
+      }
+      StoreOperand(Out1, Inst, 0, Dst1Mask, OperationType);
+
+      for (BYTE c = 0; c < DXBC::kWidth; c++) {
+        if (!Dst2Mask.IsSet(c)) continue;
+        Out2[c] = m_pBuilder->CreateSelect(In1[c], In2[c], In3[c]);
+      }
+      StoreOperand(Out2, Inst, 1, Dst2Mask, OperationType);
+
+      break;
+    }
+
+      //
+      // Floating point unary.
+      //
+    case D3D10_SB_OPCODE_EXP:           ConvertUnary(OP::OpCode::Exp,         CompType::getF32(), Inst); break;
+    case D3D10_SB_OPCODE_FRC:           ConvertUnary(OP::OpCode::Frc,         CompType::getF32(), Inst); break;
+    case D3D10_SB_OPCODE_LOG:           ConvertUnary(OP::OpCode::Log,         CompType::getF32(), Inst); break;
+    case D3D10_SB_OPCODE_SQRT:          ConvertUnary(OP::OpCode::Sqrt,        CompType::getF32(), Inst); break;
+    case D3D10_SB_OPCODE_RSQ:           ConvertUnary(OP::OpCode::Rsqrt,       CompType::getF32(), Inst); break;
+    case D3D10_SB_OPCODE_ROUND_NE:      ConvertUnary(OP::OpCode::Round_ne,    CompType::getF32(), Inst); break;
+    case D3D10_SB_OPCODE_ROUND_NI:      ConvertUnary(OP::OpCode::Round_ni,    CompType::getF32(), Inst); break;
+    case D3D10_SB_OPCODE_ROUND_PI:      ConvertUnary(OP::OpCode::Round_pi,    CompType::getF32(), Inst); break;
+    case D3D10_SB_OPCODE_ROUND_Z:       ConvertUnary(OP::OpCode::Round_z,     CompType::getF32(), Inst); break;
+
+    case D3D11_SB_OPCODE_RCP: {
+      CMask WriteMask = CMask::FromDXBC(Inst.m_Operands[0].m_WriteMask);
+      CompType OperationType = DXBC::GetCompTypeWithMinPrec(CompType::getF32(), Inst.m_Operands[0].m_MinPrecision);
+
+      OperandValue In, Out;
+      LoadOperand(In, Inst, 1, WriteMask, OperationType);
+      Value *One = m_pOP->GetFloatConst(1.0f);
+      if (OperationType.Is16Bit())
+        One = ConstantFP::get(m_pBuilder->getHalfTy(), 1.0);
+      for (BYTE c = 0; c < DXBC::kWidth; c++) {
+        if (!WriteMask.IsSet(c)) continue;
+
+        Out[c] = m_pBuilder->CreateBinOp(Instruction::BinaryOps::FDiv, One, In[c]);
+      }
+
+      StoreOperand(Out, Inst, 0, WriteMask, OperationType);
+      break;
+    }
+
+    case D3D10_SB_OPCODE_SINCOS:
+    {
+      CMask WriteMaskSin;
+      CMask WriteMaskCos;
+      CompType OperationType;
+      if (Inst.m_Operands[0].m_Type != D3D10_SB_OPERAND_TYPE_NULL) {
+        WriteMaskSin = CMask::FromDXBC(Inst.m_Operands[0].m_WriteMask);
+        OperationType = DXBC::GetCompTypeWithMinPrec(CompType::getF32(), Inst.m_Operands[0].m_MinPrecision);
+      }
+      if (Inst.m_Operands[1].m_Type != D3D10_SB_OPERAND_TYPE_NULL) {
+        WriteMaskCos = CMask::FromDXBC(Inst.m_Operands[1].m_WriteMask);
+        CompType OperationTypeCos = DXBC::GetCompTypeWithMinPrec(CompType::getF32(), Inst.m_Operands[1].m_MinPrecision);
+        DXASSERT_DXBC(OperationType.GetKind() == CompType::Kind::Invalid || OperationType == OperationTypeCos);
+        OperationType = OperationTypeCos;
+      }
+      CMask WriteMaskAll = WriteMaskSin | WriteMaskCos;
+      Type *pOperationType = OperationType.GetLLVMType(m_Ctx);
+
+      OperandValue In;
+      LoadOperand(In, Inst, 2, WriteMaskAll, OperationType);
+
+      if (Inst.m_Operands[0].m_Type != D3D10_SB_OPERAND_TYPE_NULL) {
+        OperandValue Out;
+        Function *pFunc = m_pOP->GetOpFunc(OP::OpCode::Sin, pOperationType);
+        for (BYTE c = 0; c < DXBC::kWidth; c++) {
+          if (!WriteMaskSin.IsSet(c)) continue;
+
+          Out[c] = m_pBuilder->CreateCall(pFunc, { m_pOP->GetU32Const((unsigned)OP::OpCode::Sin), In[c] });
+        }
+
+        StoreOperand(Out, Inst, 0, WriteMaskSin, OperationType);
+      }
+      if (Inst.m_Operands[1].m_Type != D3D10_SB_OPERAND_TYPE_NULL) {
+        OperandValue Out;
+        Function *pFunc = m_pOP->GetOpFunc(OP::OpCode::Cos, pOperationType);
+        for (BYTE c = 0; c < DXBC::kWidth; c++) {
+          if (!WriteMaskCos.IsSet(c)) continue;
+
+          Out[c] = m_pBuilder->CreateCall(pFunc, { m_pOP->GetU32Const((unsigned)OP::OpCode::Cos), In[c] });
+        }
+
+        StoreOperand(Out, Inst, 1, WriteMaskCos, OperationType);
+      }
+      break;
+    }
+
+      //
+      // Integer unary.
+      //
+    case D3D11_SB_OPCODE_BFREV:         ConvertUnary(OP::OpCode::Bfrev,       CompType::getU32(), Inst); break;
+    case D3D11_SB_OPCODE_COUNTBITS:     ConvertUnary(OP::OpCode::Countbits,   CompType::getU32(), Inst); break;
+    case D3D11_SB_OPCODE_FIRSTBIT_HI:   ConvertUnary(OP::OpCode::FirstbitHi,  CompType::getU32(), Inst); break;
+    case D3D11_SB_OPCODE_FIRSTBIT_LO:   ConvertUnary(OP::OpCode::FirstbitLo,  CompType::getU32(), Inst); break;
+    case D3D11_SB_OPCODE_FIRSTBIT_SHI:  ConvertUnary(OP::OpCode::FirstbitSHi, CompType::getI32(), Inst); break;
+
+    case D3D10_SB_OPCODE_INEG: {
+      CMask WriteMask = CMask::FromDXBC(Inst.m_Operands[0].m_WriteMask);
+      CompType OperationType = DXBC::GetCompTypeWithMinPrec(CompType::getI32(), Inst.m_Operands[0].m_MinPrecision);
+
+      OperandValue In, Out;
+      LoadOperand(In, Inst, 1, WriteMask, OperationType);
+
+      for (BYTE c = 0; c < DXBC::kWidth; c++) {
+        if (!WriteMask.IsSet(c)) continue;
+
+        Out[c] = m_pBuilder->CreateNeg(In[c]);
+      }
+
+      StoreOperand(Out, Inst, 0, WriteMask, OperationType);
+      break;
+    }
+
+    case D3D10_SB_OPCODE_NOT: {
+      CMask WriteMask = CMask::FromDXBC(Inst.m_Operands[0].m_WriteMask);
+      CompType OperationType = DXBC::GetCompTypeWithMinPrec(CompType::getI32(), Inst.m_Operands[0].m_MinPrecision);
+
+      OperandValue In, Out;
+      LoadOperand(In, Inst, 1, WriteMask, OperationType);
+
+      for (BYTE c = 0; c < DXBC::kWidth; c++) {
+        if (!WriteMask.IsSet(c)) continue;
+
+        Out[c] = m_pBuilder->CreateNot(In[c]);
+      }
+
+      StoreOperand(Out, Inst, 0, WriteMask, OperationType);
+      break;
+    }
+
+      //
+      // Floating point binary.
+      //
+    case D3D10_SB_OPCODE_ADD:           ConvertBinary(Instruction::FAdd,  CompType::getF32(), Inst); break;
+    case D3D10_SB_OPCODE_MUL:           ConvertBinary(Instruction::FMul,  CompType::getF32(), Inst); break;
+    case D3D10_SB_OPCODE_DIV:           ConvertBinary(Instruction::FDiv,  CompType::getF32(), Inst); break;
+    case D3D10_SB_OPCODE_MAX:           ConvertBinary(OP::OpCode::FMax,           CompType::getF32(), Inst); break;
+    case D3D10_SB_OPCODE_MIN:           ConvertBinary(OP::OpCode::FMin,           CompType::getF32(), Inst); break;
+    
+      //
+      // Integer binary.
+      //
+    case D3D10_SB_OPCODE_IADD:          ConvertBinary(Instruction::Add,   CompType::getI32(), Inst); break;
+    case D3D10_SB_OPCODE_IMAX:          ConvertBinary(OP::OpCode::IMax,           CompType::getI32(), Inst); break;
+    case D3D10_SB_OPCODE_IMIN:          ConvertBinary(OP::OpCode::IMin,           CompType::getI32(), Inst); break;
+    case D3D10_SB_OPCODE_UMAX:          ConvertBinary(OP::OpCode::UMax,           CompType::getU32(), Inst); break;
+    case D3D10_SB_OPCODE_UMIN:          ConvertBinary(OP::OpCode::UMin,           CompType::getU32(), Inst); break;
+
+    case D3D10_SB_OPCODE_AND:           ConvertBinary(Instruction::And,   CompType::getI32(), Inst); break;
+    case D3D10_SB_OPCODE_OR:            ConvertBinary(Instruction::Or,    CompType::getI32(), Inst); break;
+    case D3D10_SB_OPCODE_XOR:           ConvertBinary(Instruction::Xor,   CompType::getI32(), Inst); break;
+
+    case D3D10_SB_OPCODE_ISHL:          ConvertBinary(Instruction::Shl,   CompType::getI32(), Inst); break;
+    case D3D10_SB_OPCODE_ISHR:          ConvertBinary(Instruction::AShr,  CompType::getI32(), Inst); break;
+    case D3D10_SB_OPCODE_USHR:          ConvertBinary(Instruction::LShr,  CompType::getI32(), Inst); break;
+
+      //
+      // Integer binary with two outputs.
+      //
+    case D3D10_SB_OPCODE_IMUL:          ConvertBinaryWithTwoOuts(OP::OpCode::IMul, Inst); break;
+    case D3D10_SB_OPCODE_UMUL:          ConvertBinaryWithTwoOuts(OP::OpCode::UMul, Inst); break;
+    case D3D10_SB_OPCODE_UDIV:          ConvertBinaryWithTwoOuts(OP::OpCode::UDiv, Inst); break;
+      
+      //
+      // Integer binary with carry.
+      //
+    case D3D11_SB_OPCODE_UADDC:         ConvertBinaryWithCarry(OP::OpCode::UAddc, Inst); break;
+    case D3D11_SB_OPCODE_USUBB:         ConvertBinaryWithCarry(OP::OpCode::USubb, Inst); break;
+
+      //
+      // Floating point tertiary.
+      //
+    case D3D10_SB_OPCODE_MAD:           ConvertTertiary(OP::OpCode::FMad,         CompType::getF32(), Inst); break;
+
+      //
+      // Integer tertiary.
+      //
+    case D3D10_SB_OPCODE_IMAD:          ConvertTertiary(OP::OpCode::IMad,         CompType::getI32(), Inst); break;
+    case D3D10_SB_OPCODE_UMAD:          ConvertTertiary(OP::OpCode::UMad,         CompType::getI32(), Inst); break;
+    case D3D11_1_SB_OPCODE_MSAD:        ConvertTertiary(OP::OpCode::Msad,         CompType::getI32(), Inst); break;
+    case D3D11_SB_OPCODE_IBFE:          ConvertTertiary(OP::OpCode::Ibfe,         CompType::getI32(), Inst); break;
+    case D3D11_SB_OPCODE_UBFE:          ConvertTertiary(OP::OpCode::Ubfe,         CompType::getI32(), Inst); break;
+
+      //
+      // Quaternary int.
+      //
+    case D3D11_SB_OPCODE_BFI:           ConvertQuaternary(OP::OpCode::Bfi,        CompType::getI32(), Inst); break;
+
+      //
+      // Logical comparison.
+      //
+    case D3D10_SB_OPCODE_EQ:            ConvertComparison(CmpInst::FCMP_OEQ, CompType::getF32(), Inst); break;
+    case D3D10_SB_OPCODE_NE:            ConvertComparison(CmpInst::FCMP_UNE, CompType::getF32(), Inst); break;
+    case D3D10_SB_OPCODE_LT:            ConvertComparison(CmpInst::FCMP_OLT, CompType::getF32(), Inst); break;
+    case D3D10_SB_OPCODE_GE:            ConvertComparison(CmpInst::FCMP_OGE, CompType::getF32(), Inst); break;
+
+    case D3D10_SB_OPCODE_IEQ:           ConvertComparison(CmpInst::ICMP_EQ,  CompType::getI32(), Inst); break;
+    case D3D10_SB_OPCODE_INE:           ConvertComparison(CmpInst::ICMP_NE,  CompType::getI32(), Inst); break;
+    case D3D10_SB_OPCODE_ILT:           ConvertComparison(CmpInst::ICMP_SLT, CompType::getI32(), Inst); break;
+    case D3D10_SB_OPCODE_IGE:           ConvertComparison(CmpInst::ICMP_SGE, CompType::getI32(), Inst); break;
+    case D3D10_SB_OPCODE_ULT:           ConvertComparison(CmpInst::ICMP_ULT, CompType::getI32(), Inst); break;
+    case D3D10_SB_OPCODE_UGE:           ConvertComparison(CmpInst::ICMP_UGE, CompType::getI32(), Inst); break;
+
+      //
+      // Dot product.
+      //
+    case D3D10_SB_OPCODE_DP2:           ConvertDotProduct(OP::OpCode::Dot2, 2, CMask::MakeMask(1,1,0,0), Inst); break;
+    case D3D10_SB_OPCODE_DP3:           ConvertDotProduct(OP::OpCode::Dot3, 3, CMask::MakeMask(1,1,1,0), Inst); break;
+    case D3D10_SB_OPCODE_DP4:           ConvertDotProduct(OP::OpCode::Dot4, 4, CMask::MakeMask(1,1,1,1), Inst); break;
+
+      //
+      // Type conversions.
+      //
+    case D3D10_SB_OPCODE_ITOF:          ConvertCast(CompType::getI32(), CompType::getF32(), Inst); break;
+    case D3D10_SB_OPCODE_UTOF:          ConvertCast(CompType::getU32(), CompType::getF32(), Inst); break;
+    case D3D10_SB_OPCODE_FTOI:          ConvertCast(CompType::getF32(), CompType::getI32(), Inst); break;
+    case D3D10_SB_OPCODE_FTOU:          ConvertCast(CompType::getF32(), CompType::getU32(), Inst); break;
+
+    case D3D11_SB_OPCODE_F32TOF16: {
+      const unsigned DstIdx = 0;
+      const unsigned SrcIdx = 1;
+      CMask WriteMask = CMask::FromDXBC(Inst.m_Operands[DstIdx].m_WriteMask);
+      
+      if (!WriteMask.IsZero()) {
+        OperandValue In, Out;
+        LoadOperand(In, Inst, SrcIdx, WriteMask, CompType::getF32());
+
+        OP::OpCode OpCode = OP::OpCode::LegacyF32ToF16;
+        CompType DstType = CompType::getU32();
+        Function *F = m_pOP->GetOpFunc(OpCode, Type::getVoidTy(m_Ctx));
+
+        for (BYTE c = 0; c < DXBC::kWidth; c++) {
+          if (!WriteMask.IsSet(c)) continue;
+
+          Value *Args[2];
+          Args[0] = m_pOP->GetU32Const((unsigned)OpCode);
+          Args[1] = In[c];
+          Out[c] = m_pBuilder->CreateCall(F, Args);
+        }
+
+        StoreOperand(Out, Inst, DstIdx, WriteMask, DstType);
+      }
+      break;
+    }
+
+    case D3D11_SB_OPCODE_F16TOF32: {
+      const unsigned DstIdx = 0;
+      const unsigned SrcIdx = 1;
+      CMask WriteMask = CMask::FromDXBC(Inst.m_Operands[DstIdx].m_WriteMask);
+      
+      if (!WriteMask.IsZero()) {
+        OperandValue In, Out;
+        D3D10_SB_OPERAND_MODIFIER SrcModifier = Inst.m_Operands[SrcIdx].m_Modifier;
+        Inst.m_Operands[SrcIdx].m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE;
+        LoadOperand(In, Inst, SrcIdx, WriteMask, CompType::getU32());
+
+        OP::OpCode OpCode = OP::OpCode::LegacyF16ToF32;
+        CompType DstType = CompType::getF32();
+        Function *F = m_pOP->GetOpFunc(OpCode, Type::getVoidTy(m_Ctx));
+
+        for (BYTE c = 0; c < DXBC::kWidth; c++) {
+          if (!WriteMask.IsSet(c)) continue;
+
+          Value *Args[2];
+          Args[0] = m_pOP->GetU32Const((unsigned)OpCode);
+          Args[1] = In[c];
+          Value *pResult = m_pBuilder->CreateCall(F, Args);
+
+          // Special-case: propagate source operand modifiers to result.
+          if (SrcModifier & D3D10_SB_OPERAND_MODIFIER_ABS) {
+            Function *Fabs = m_pOP->GetOpFunc(OP::OpCode::FAbs, pResult->getType());
+            Value *Args[2];
+            Args[0] = m_pOP->GetU32Const((unsigned)OP::OpCode::FAbs);
+            Args[1] = pResult;
+            pResult = m_pBuilder->CreateCall(Fabs, Args);
+          }
+          if (SrcModifier & D3D10_SB_OPERAND_MODIFIER_NEG) {
+            pResult = MarkPrecise(m_pBuilder->CreateFNeg(MarkPrecise(pResult, c)), c);
+          }
+
+          Out[c] = pResult;
+        }
+
+        StoreOperand(Out, Inst, DstIdx, WriteMask, CompType::getF32());
+      }
+      break;
+    }
+
+      //
+      // Double-precision operations.
+      //
+    case D3D11_SB_OPCODE_DADD:          ConvertBinary(Instruction::FAdd, CompType::getF64(), Inst); break;
+    case D3D11_SB_OPCODE_DMAX:          ConvertBinary(OP::OpCode::FMax,          CompType::getF64(), Inst); break;
+    case D3D11_SB_OPCODE_DMIN:          ConvertBinary(OP::OpCode::FMin,          CompType::getF64(), Inst); break;
+    case D3D11_SB_OPCODE_DMUL:          ConvertBinary(Instruction::FMul, CompType::getF64(), Inst); break;
+    case D3D11_1_SB_OPCODE_DDIV:        ConvertBinary(Instruction::FDiv, CompType::getF64(), Inst); break;
+
+    case D3D11_1_SB_OPCODE_DFMA:        ConvertTertiary(OP::OpCode::Fma,          CompType::getF64(), Inst); break;
+
+    case D3D11_SB_OPCODE_DEQ:           ConvertComparison(CmpInst::FCMP_OEQ, CompType::getF64(), Inst); break;
+    case D3D11_SB_OPCODE_DGE:           ConvertComparison(CmpInst::FCMP_OGE, CompType::getF64(), Inst); break;
+    case D3D11_SB_OPCODE_DLT:           ConvertComparison(CmpInst::FCMP_OLT, CompType::getF64(), Inst); break;
+    case D3D11_SB_OPCODE_DNE:           ConvertComparison(CmpInst::FCMP_UNE, CompType::getF64(), Inst); break;
+
+    case D3D11_SB_OPCODE_DMOV: {
+      CMask WriteMask = CMask::FromDXBC(Inst.m_Operands[0].m_WriteMask);
+      OperandValue In;
+      LoadOperand(In, Inst, 1, WriteMask, CompType::getF64());
+      StoreOperand(In, Inst, 0, WriteMask, CompType::getF64());
+      break;
+    }
+
+    case D3D11_SB_OPCODE_DMOVC: {
+      CMask WriteMask = CMask::FromDXBC(Inst.m_Operands[0].m_WriteMask);
+      CompType OperationType = CompType::getF64();
+      OperandValue In1, In2, In3, Out;
+      LoadOperand(In1, Inst, 1, CMask(1, 1, 0, 0), CompType::getI1());
+      LoadOperand(In2, Inst, 2, WriteMask, OperationType);
+      LoadOperand(In3, Inst, 3, WriteMask, OperationType);
+
+      for (BYTE c = 0; c < DXBC::kWidth; c += 2) {
+        if (!WriteMask.IsSet(c)) continue;
+        Out[c] = m_pBuilder->CreateSelect(In1[c>>1], In2[c], In3[c]);
+      }
+
+      StoreOperand(Out, Inst, 0, WriteMask, OperationType);
+      break;
+    }
+
+    case D3D11_1_SB_OPCODE_DRCP: {
+      CMask WriteMask = CMask::FromDXBC(Inst.m_Operands[0].m_WriteMask);
+      CompType OperationType = CompType::getF64();
+
+      OperandValue In, Out;
+      LoadOperand(In, Inst, 1, WriteMask, OperationType);
+
+      for (BYTE c = 0; c < DXBC::kWidth; c += 2) {
+        if (!WriteMask.IsSet(c)) continue;
+
+        Out[c] = m_pBuilder->CreateBinOp(Instruction::BinaryOps::FDiv, m_pOP->GetDoubleConst(1.0), In[c]);
+      }
+
+      StoreOperand(Out, Inst, 0, WriteMask, OperationType);
+      break;
+    }
+
+    case D3D11_SB_OPCODE_DTOF:      ConvertFromDouble(CompType::getF32(), Inst); break;
+    case D3D11_1_SB_OPCODE_DTOI:    ConvertFromDouble(CompType::getI32(), Inst); break;
+    case D3D11_1_SB_OPCODE_DTOU:    ConvertFromDouble(CompType::getU32(), Inst); break;
+    case D3D11_SB_OPCODE_FTOD:      ConvertToDouble  (CompType::getF32(), Inst); break;
+    case D3D11_1_SB_OPCODE_ITOD:    ConvertToDouble  (CompType::getI32(), Inst); break;
+    case D3D11_1_SB_OPCODE_UTOD:    ConvertToDouble  (CompType::getU32(), Inst); break;
+
+      //
+      // Resource operations.
+      //
+    case D3D10_SB_OPCODE_SAMPLE: 
+    case D3DWDDM1_3_SB_OPCODE_SAMPLE_CLAMP_FEEDBACK: {
+      OP::OpCode OpCode = OP::OpCode::Sample;
+      bool bHasFeedback = DXBC::HasFeedback(Inst.OpCode());
+      const unsigned uOpOutput = 0;
+      const unsigned uOpClamp = 4 + (bHasFeedback ? 1 : 0);
+      Value *Args[11];
+
+      LoadCommonSampleInputs(Inst, &Args[0]);
+
+      // Other arguments.
+      Args[0] = m_pOP->GetU32Const((unsigned)OpCode);
+      // Clamp.
+      Args[10] = m_pOP->GetFloatConst(0.f);
+      if (bHasFeedback) {
+        if (Inst.m_Operands[uOpClamp].m_Type != D3D10_SB_OPERAND_TYPE_IMMEDIATE32 ||
+            Inst.m_Operands[uOpClamp].m_Valuef[0] != 0.f) {
+          OperandValue InClamp;
+          LoadOperand(InClamp, Inst, uOpClamp, CMask::MakeXMask(), CompType::getF32());
+          Args[10] = InClamp[0];
+        }
+      }
+
+      // Function call.
+      CompType DstType = DXBC::GetCompTypeWithMinPrec(CompType::getF32(), Inst.m_Operands[uOpOutput].m_MinPrecision);
+      Type *pDstType = DstType.GetLLVMType(m_Ctx);
+      Function *F = m_pOP->GetOpFunc(OpCode, pDstType);
+      Value *pOpRet = m_pBuilder->CreateCall(F, Args);
+
+      StoreResRetOutputAndStatus(Inst, pOpRet, DstType);
+      break;
+    }
+
+    case D3D10_SB_OPCODE_SAMPLE_B:
+    case D3DWDDM1_3_SB_OPCODE_SAMPLE_B_CLAMP_FEEDBACK: {
+      OP::OpCode OpCode = OP::OpCode::SampleBias;
+      bool bHasFeedback = DXBC::HasFeedback(Inst.OpCode());
+      const unsigned uOpOutput = 0;
+      const unsigned uOpBias = 4 + (bHasFeedback ? 1 : 0);
+      const unsigned uOpClamp = uOpBias + 1;
+      Value *Args[12];
+
+      LoadCommonSampleInputs(Inst, &Args[0]);
+
+      // Other arguments.
+      Args[0] = m_pOP->GetU32Const((unsigned)OpCode);
+      OperandValue InBias;
+      LoadOperand(InBias, Inst, uOpBias, CMask::MakeXMask(), CompType::getF32());
+      Args[10] = InBias[0];
+      // Clamp.
+      Args[11] = m_pOP->GetFloatConst(0.f);
+      if (bHasFeedback) {
+        if (Inst.m_Operands[uOpClamp].m_Type != D3D10_SB_OPERAND_TYPE_IMMEDIATE32 ||
+            Inst.m_Operands[uOpClamp].m_Valuef[0] != 0.f) {
+          OperandValue InClamp;
+          LoadOperand(InClamp, Inst, uOpClamp, CMask::MakeXMask(), CompType::getF32());
+          Args[11] = InClamp[0];
+        }
+      }
+
+      // Function call.
+      CompType DstType = DXBC::GetCompTypeWithMinPrec(CompType::getF32(), Inst.m_Operands[uOpOutput].m_MinPrecision);
+      Type *pDstType = DstType.GetLLVMType(m_Ctx);
+      Function *F = m_pOP->GetOpFunc(OpCode, pDstType);
+      Value *pOpRet = m_pBuilder->CreateCall(F, Args);
+
+      StoreResRetOutputAndStatus(Inst, pOpRet, DstType);
+      break;
+    }
+
+    case D3D10_SB_OPCODE_SAMPLE_L:
+    case D3DWDDM1_3_SB_OPCODE_SAMPLE_L_FEEDBACK: {
+      OP::OpCode OpCode = OP::OpCode::SampleLevel;
+      bool bHasFeedback = DXBC::HasFeedback(Inst.OpCode());
+      const unsigned uOpOutput = 0;
+      const unsigned uOpLevel = 4 + (bHasFeedback ? 1 : 0);
+      Value *Args[11];
+
+      LoadCommonSampleInputs(Inst, &Args[0]);
+
+      // Other arguments.
+      Args[0] = m_pOP->GetU32Const((unsigned)OpCode);
+      OperandValue InLevel;
+      LoadOperand(InLevel, Inst, uOpLevel, CMask::MakeXMask(), CompType::getF32());
+      Args[10] = InLevel[0];
+
+      // Function call.
+      CompType DstType = DXBC::GetCompTypeWithMinPrec(CompType::getF32(), Inst.m_Operands[uOpOutput].m_MinPrecision);
+      Type *pDstType = DstType.GetLLVMType(m_Ctx);
+      Function *F = m_pOP->GetOpFunc(OpCode, pDstType);
+      Value *pOpRet = m_pBuilder->CreateCall(F, Args);
+
+      StoreResRetOutputAndStatus(Inst, pOpRet, DstType);
+      break;
+    }
+
+    case D3D10_SB_OPCODE_SAMPLE_D:
+    case D3DWDDM1_3_SB_OPCODE_SAMPLE_D_CLAMP_FEEDBACK: {
+      OP::OpCode OpCode = OP::OpCode::SampleGrad;
+      bool bHasFeedback = DXBC::HasFeedback(Inst.OpCode());
+      const unsigned uOpOutput = 0;
+      const unsigned uOpSRV = DXBC::GetResourceSlot(Inst.OpCode());
+      const unsigned uOpDx = 4 + (bHasFeedback ? 1 : 0);
+      const unsigned uOpDy = uOpDx + 1;
+      const unsigned uOpClamp = uOpDy + 1;
+      const DxilResource &R = GetSRVFromOperand(Inst, uOpSRV);
+      Value *Args[17];
+
+      LoadCommonSampleInputs(Inst, &Args[0]);
+
+      // Other arguments.
+      Args[0] = m_pOP->GetU32Const((unsigned)OpCode);
+      OperandValue InDx, InDy;
+      CMask DxDyMask = CMask::MakeFirstNCompMask(DXBC::GetNumResCoords(R.GetKind()));
+      LoadOperand(InDx, Inst, uOpDx, DxDyMask, CompType::getF32());
+      Args[10] = DxDyMask.IsSet(0) ? InDx[0] : m_pUnusedF32;
+      Args[11] = DxDyMask.IsSet(1) ? InDx[1] : m_pUnusedF32;
+      Args[12] = DxDyMask.IsSet(2) ? InDx[2] : m_pUnusedF32;
+      LoadOperand(InDy, Inst, uOpDy, DxDyMask, CompType::getF32());
+      Args[13] = DxDyMask.IsSet(0) ? InDy[0] : m_pUnusedF32;
+      Args[14] = DxDyMask.IsSet(1) ? InDy[1] : m_pUnusedF32;
+      Args[15] = DxDyMask.IsSet(2) ? InDy[2] : m_pUnusedF32;
+      // Clamp.
+      Args[16] = m_pOP->GetFloatConst(0.f);
+      if (bHasFeedback) {
+        if (Inst.m_Operands[uOpClamp].m_Type != D3D10_SB_OPERAND_TYPE_IMMEDIATE32 ||
+            Inst.m_Operands[uOpClamp].m_Valuef[0] != 0.f) {
+          OperandValue InClamp;
+          LoadOperand(InClamp, Inst, uOpClamp, CMask::MakeXMask(), CompType::getF32());
+          Args[16] = InClamp[0];
+        }
+      }
+
+      // Function call.
+      CompType DstType = DXBC::GetCompTypeWithMinPrec(CompType::getF32(), Inst.m_Operands[uOpOutput].m_MinPrecision);
+      Type *pDstType = DstType.GetLLVMType(m_Ctx);
+      Function *F = m_pOP->GetOpFunc(OpCode, pDstType);
+      Value *pOpRet = m_pBuilder->CreateCall(F, Args);
+
+      StoreResRetOutputAndStatus(Inst, pOpRet, DstType);
+      break;
+    }
+
+    case D3D10_SB_OPCODE_SAMPLE_C:
+    case D3DWDDM1_3_SB_OPCODE_SAMPLE_C_CLAMP_FEEDBACK: {
+      OP::OpCode OpCode = OP::OpCode::SampleCmp;
+      bool bHasFeedback = DXBC::HasFeedback(Inst.OpCode());
+      const unsigned uOpOutput = 0;
+      const unsigned uOpCmp = 4 + (bHasFeedback ? 1 : 0);
+      const unsigned uOpClamp = uOpCmp + 1;
+      Value *Args[12];
+
+      LoadCommonSampleInputs(Inst, &Args[0]);
+
+      // Other arguments.
+      Args[0] = m_pOP->GetU32Const((unsigned)OpCode);
+      OperandValue InCmp;
+      LoadOperand(InCmp, Inst, uOpCmp, CMask::MakeXMask(), CompType::getF32());
+      Args[10] = InCmp[0];
+      // Clamp.
+      Args[11] = m_pOP->GetFloatConst(0.f);
+      if (bHasFeedback) {
+        if (Inst.m_Operands[uOpClamp].m_Type != D3D10_SB_OPERAND_TYPE_IMMEDIATE32 ||
+            Inst.m_Operands[uOpClamp].m_Valuef[0] != 0.f) {
+          OperandValue InClamp;
+          LoadOperand(InClamp, Inst, uOpClamp, CMask::MakeXMask(), CompType::getF32());
+          Args[11] = InClamp[0];
+        }
+      }
+
+      // Function call.
+      CompType DstType = DXBC::GetCompTypeWithMinPrec(CompType::getF32(), Inst.m_Operands[uOpOutput].m_MinPrecision);
+      Type *pDstType = DstType.GetLLVMType(m_Ctx);
+      Function *F = m_pOP->GetOpFunc(OpCode, pDstType);
+      Value *pOpRet = m_pBuilder->CreateCall(F, Args);
+
+      StoreResRetOutputAndStatus(Inst, pOpRet, DstType);
+      break;
+    }
+
+    case D3D10_SB_OPCODE_SAMPLE_C_LZ:
+    case D3DWDDM1_3_SB_OPCODE_SAMPLE_C_LZ_FEEDBACK: {
+      OP::OpCode OpCode = OP::OpCode::SampleCmpLevelZero;
+      bool bHasFeedback = DXBC::HasFeedback(Inst.OpCode());
+      const unsigned uOpOutput = 0;
+      const unsigned uOpCmp = 4 + (bHasFeedback ? 1 : 0);
+      Value *Args[11];
+
+      LoadCommonSampleInputs(Inst, &Args[0]);
+
+      // Other arguments.
+      Args[0] = m_pOP->GetU32Const((unsigned)OpCode); // OpCode
+      OperandValue InCmp;
+      LoadOperand(InCmp, Inst, uOpCmp, CMask::MakeXMask(), CompType::getF32());
+      Args[10] = InCmp[0];
+
+      // Function call.
+      CompType DstType = DXBC::GetCompTypeWithMinPrec(CompType::getF32(), Inst.m_Operands[uOpOutput].m_MinPrecision);
+      Type *pDstType = DstType.GetLLVMType(m_Ctx);
+      Function *F = m_pOP->GetOpFunc(OpCode, pDstType);
+      Value *pOpRet = m_pBuilder->CreateCall(F, Args);
+
+      StoreResRetOutputAndStatus(Inst, pOpRet, DstType);
+      break;
+    }
+
+    case D3D10_SB_OPCODE_LD:
+    case D3D10_SB_OPCODE_LD_MS:
+    case D3DWDDM1_3_SB_OPCODE_LD_FEEDBACK:
+    case D3DWDDM1_3_SB_OPCODE_LD_MS_FEEDBACK: {
+      bool bIsTexture2DMS = Inst.OpCode() == D3D10_SB_OPCODE_LD_MS || 
+                            Inst.OpCode() == D3DWDDM1_3_SB_OPCODE_LD_MS_FEEDBACK;
+      bool bHasFeedback = DXBC::HasFeedback(Inst.OpCode());
+      const unsigned uOpOutput = 0;
+      const unsigned uOpStatus = 1;
+      const unsigned uOpCoord = uOpStatus + (bHasFeedback ? 1 : 0);
+      const unsigned uOpRes = uOpCoord + 1;
+      const unsigned uOpSampleCount = uOpRes + 1;
+      DXASSERT_DXBC(Inst.m_Operands[uOpRes].m_Type == D3D10_SB_OPERAND_TYPE_RESOURCE);
+
+      // Resource.
+      OperandValue InSRV;
+      const DxilResource &R = LoadSRVOperand(InSRV, Inst, uOpRes, CMask::MakeXMask(), CompType::getInvalid());
+
+      // Return type.
+      CompType DstType = DXBC::GetCompTypeWithMinPrec(R.GetCompType().GetBaseCompType(), Inst.m_Operands[uOpOutput].m_MinPrecision);
+      Type *pDstType = DstType.GetLLVMType(m_Ctx);
+
+      // Create Load call.
+      Value *pOpRet;
+      if (R.GetKind() != DxilResource::Kind::TypedBuffer) {
+        OP::OpCode OpCode = OP::OpCode::TextureLoad;
+
+        // Coordinates.
+        OperandValue InCoord;
+        CMask CoordMask = CMask::MakeFirstNCompMask(DXBC::GetNumResCoords(R.GetKind()));
+        // MIP level.
+        if (!bIsTexture2DMS) {
+          CoordMask.Set(3);
+        }
+        LoadOperand(InCoord, Inst, uOpCoord, CoordMask, CompType::getI32());
+
+        Value *Args[9];
+        Args[0] = m_pOP->GetU32Const((unsigned)OpCode);             // OpCode
+        Args[1] = InSRV[0];                                         // Texture SRV handle
+        if (!bIsTexture2DMS) {
+          Args[2] = InCoord[3];                                     // MIP level
+        } else {
+          BYTE Comp = Inst.m_Operands[uOpSampleCount].m_ComponentName;
+          OperandValue InSampleCount;
+          LoadOperand(InSampleCount, Inst, uOpSampleCount, CMask::MakeCompMask(Comp), CompType::getI32());
+          Args[2] = InSampleCount[Comp];                            // Sample count
+        }
+        // Coordinates.
+        Args[3] = CoordMask.IsSet(0) ? InCoord[0] : m_pUnusedI32;   // Coordinate 0
+        Args[4] = CoordMask.IsSet(1) ? InCoord[1] : m_pUnusedI32;   // Coordinate 1
+        Args[5] = CoordMask.IsSet(2) ? InCoord[2] : m_pUnusedI32;   // Coordinate 2
+        // Offsets.
+        CMask OffsetMask = CMask::MakeFirstNCompMask(DXBC::GetNumResOffsets(R.GetKind()));
+        Args[6] = OffsetMask.IsSet(0) ? m_pOP->GetU32Const(Inst.m_TexelOffset[0]) : m_pUnusedI32; // Offset 0
+        Args[7] = OffsetMask.IsSet(1) ? m_pOP->GetU32Const(Inst.m_TexelOffset[1]) : m_pUnusedI32; // Offset 1
+        Args[8] = OffsetMask.IsSet(2) ? m_pOP->GetU32Const(Inst.m_TexelOffset[2]) : m_pUnusedI32; // Offset 2
+
+        Function *F = m_pOP->GetOpFunc(OpCode, pDstType);
+        pOpRet = m_pBuilder->CreateCall(F, Args);
+      } else {
+        // R.GetKind() == DxilResource::TypedBuffer
+        OP::OpCode OpCode = OP::OpCode::BufferLoad;
+
+        Value *Args[4];
+        Args[0] = m_pOP->GetU32Const((unsigned)OpCode);     // OpCode
+        Args[1] = InSRV[0];                                 // Buffer SRV handle
+        Args[2] = GetCoordValue(Inst, uOpCoord);            // Coord 0: in elements
+        Args[3] = m_pUnusedI32;                             // Coord 1: unused
+
+        Function *F = m_pOP->GetOpFunc(OpCode, pDstType);
+        pOpRet = m_pBuilder->CreateCall(F, Args);
+      }
+
+      StoreResRetOutputAndStatus(Inst, pOpRet, DstType);
+      break;
+    }
+
+    case D3D11_SB_OPCODE_LD_UAV_TYPED:
+    case D3DWDDM1_3_SB_OPCODE_LD_UAV_TYPED_FEEDBACK: {
+      bool bHasStatus = DXBC::HasFeedback(Inst.OpCode());
+      const unsigned uOpOutput = 0;
+      const unsigned uOpStatus = 1;
+      const unsigned uOpCoord = uOpStatus + (bHasStatus ? 1 : 0);
+      const unsigned uOpUAV = uOpCoord + 1;
+      DXASSERT_DXBC(Inst.m_Operands[uOpUAV].m_Type == D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW);
+      const DxilResource &R = m_pPR->GetUAV(m_UAVRangeMap[Inst.m_Operands[uOpUAV].m_Index[0].m_RegIndex]);
+
+      // Resource.
+      OperandValue InUAV;
+      LoadOperand(InUAV, Inst, uOpUAV, CMask::MakeXMask(), CompType::getInvalid());
+
+      // Return type.
+      CompType DstType = DXBC::GetCompTypeWithMinPrec(R.GetCompType().GetBaseCompType(), Inst.m_Operands[uOpOutput].m_MinPrecision);
+      Type *pDstType = DstType.GetLLVMType(m_Ctx);
+
+      // Create Load call.
+      Value *pOpRet;
+      if (R.GetKind() != DxilResource::Kind::TypedBuffer) {
+        OP::OpCode OpCode = OP::OpCode::TextureLoad;
+
+        // Coordinates.
+        OperandValue InCoord;
+        CMask CoordMask = CMask::MakeFirstNCompMask(DXBC::GetNumResCoords(R.GetKind()));
+        LoadOperand(InCoord, Inst, uOpCoord, CoordMask, CompType::getI32());
+
+        Value *Args[9];
+        Args[0] = m_pOP->GetU32Const((unsigned)OpCode);           // OpCode
+        Args[1] = InUAV[0];                                       // RWTexture UAV handle
+        Args[2] = m_pUnusedI32;                                   // MIP level.
+        // Coordinates.
+        Args[3] = CoordMask.IsSet(0) ? InCoord[0] : m_pUnusedI32;  // Coordinate 0
+        Args[4] = CoordMask.IsSet(1) ? InCoord[1] : m_pUnusedI32;  // Coordinate 1
+        Args[5] = CoordMask.IsSet(2) ? InCoord[2] : m_pUnusedI32;  // Coordinate 2
+        // Offsets.
+        Args[6] = m_pUnusedI32;                                    // Offset 0
+        Args[7] = m_pUnusedI32;                                    // Offset 1
+        Args[8] = m_pUnusedI32;                                    // Offset 2
+
+        Function *F = m_pOP->GetOpFunc(OpCode, pDstType);
+        pOpRet = m_pBuilder->CreateCall(F, Args);
+      } else {
+        // R.GetKind() == DxilResource::TypedBuffer
+        OP::OpCode OpCode = OP::OpCode::BufferLoad;
+
+        Value *Args[4];
+        Args[0] = m_pOP->GetU32Const((unsigned)OpCode);     // OpCode
+        Args[1] = InUAV[0];                                 // RWBuffer UAV handle
+        Args[2] = GetCoordValue(Inst, uOpCoord);            // Coord 0: in elements
+        Args[3] = m_pUnusedI32;                             // Coord 1: undef
+
+        Function *F = m_pOP->GetOpFunc(OpCode, pDstType);
+        pOpRet = m_pBuilder->CreateCall(F, Args);
+      }
+
+      StoreResRetOutputAndStatus(Inst, pOpRet, DstType);
+      break;
+    }
+
+    case D3D11_SB_OPCODE_STORE_UAV_TYPED: {
+      const unsigned uOpUAV = 0;
+      const unsigned uOpCoord = uOpUAV + 1;
+      const unsigned uOpValue = uOpCoord + 1;
+      DXASSERT_DXBC(Inst.m_Operands[uOpUAV].m_Type == D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW);
+      const DxilResource &R = m_pPR->GetUAV(m_UAVRangeMap[Inst.m_Operands[uOpUAV].m_Index[0].m_RegIndex]);
+      OperandValue InUAV, InCoord, InValue;
+
+      // Resource.
+      LoadOperand(InUAV, Inst, uOpUAV, CMask::MakeXMask(), CompType::getInvalid());
+
+      // Coordinates.
+      CMask CoordMask = CMask::MakeFirstNCompMask(DXBC::GetNumResCoords(R.GetKind()));
+      LoadOperand(InCoord, Inst, uOpCoord, CoordMask, CompType::getI32());
+
+      // Value type.
+      CompType ValueType = DXBC::GetCompTypeWithMinPrec(R.GetCompType().GetBaseCompType(), Inst.m_Operands[uOpUAV].m_MinPrecision);
+      Type *pValueType = ValueType.GetLLVMType(m_Ctx);
+
+      // Value.
+      CMask ValueMask = CMask::FromDXBC(Inst.m_Operands[uOpUAV].m_WriteMask);
+      LoadOperand(InValue, Inst, uOpValue, ValueMask, ValueType);
+
+      // Create Store call.
+      if (R.GetKind() != DxilResource::Kind::TypedBuffer) {
+        OP::OpCode OpCode = OP::OpCode::TextureStore;
+
+        Value *Args[10];
+        Args[0] = m_pOP->GetU32Const((unsigned)OpCode);           // OpCode
+        Args[1] = InUAV[0];                                       // RWTexture UAV handle
+        // Coordinates.
+        Args[2] = CoordMask.IsSet(0) ? InCoord[0] : m_pUnusedI32; // Coordinate 0
+        Args[3] = CoordMask.IsSet(1) ? InCoord[1] : m_pUnusedI32; // Coordinate 1
+        Args[4] = CoordMask.IsSet(2) ? InCoord[2] : m_pUnusedI32; // Coordinate 2
+        // Value.
+        Args[5] = ValueMask.IsSet(0) ? InValue[0] : m_pUnusedI32; // Value 0
+        Args[6] = ValueMask.IsSet(1) ? InValue[1] : m_pUnusedI32; // Value 1
+        Args[7] = ValueMask.IsSet(2) ? InValue[2] : m_pUnusedI32; // Value 2
+        Args[8] = ValueMask.IsSet(3) ? InValue[3] : m_pUnusedI32; // Value 3
+        Args[9] = m_pOP->GetU8Const(ValueMask.ToByte());          // Value mask
+
+        Function *F = m_pOP->GetOpFunc(OpCode, pValueType);
+        MarkPrecise(m_pBuilder->CreateCall(F, Args));
+      } else {
+        // R.GetKind() == DxilResource::TypedBuffer
+        OP::OpCode OpCode = OP::OpCode::BufferStore;
+
+        Value *Args[9];
+        Args[0] = m_pOP->GetU32Const((unsigned)OpCode);           // OpCode
+        Args[1] = InUAV[0];                                       // RWBuffer UAV handle
+        Args[2] = InCoord[0];                                     // Coord 0: in elements
+        Args[3] = m_pUnusedI32;                                   // Coord 1: unused
+        Args[4] = ValueMask.IsSet(0) ? InValue[0] : m_pUnusedI32; // Value 0
+        Args[5] = ValueMask.IsSet(1) ? InValue[1] : m_pUnusedI32; // Value 1
+        Args[6] = ValueMask.IsSet(2) ? InValue[2] : m_pUnusedI32; // Value 2
+        Args[7] = ValueMask.IsSet(3) ? InValue[3] : m_pUnusedI32; // Value 3
+        Args[8] = m_pOP->GetU8Const(ValueMask.ToByte());          // Value mask
+
+        Function *F = m_pOP->GetOpFunc(OpCode, pValueType);
+        MarkPrecise(m_pBuilder->CreateCall(F, Args));
+      }
+      break;
+    }
+
+    case D3D11_SB_OPCODE_LD_RAW:
+    case D3DWDDM1_3_SB_OPCODE_LD_RAW_FEEDBACK: {
+      bool bHasFeedback = DXBC::HasFeedback(Inst.OpCode());
+      const unsigned uOpOutput = 0;
+      const unsigned uOpStatus = 1;
+      const unsigned uOpByteOffset = uOpStatus + (bHasFeedback ? 1 : 0);
+      const unsigned uOpRes = uOpByteOffset + 1;
+      // Byte offset.
+      Value *pByteOffset = GetCoordValue(Inst, uOpByteOffset);
+
+      if (Inst.m_Operands[uOpRes].m_Type != D3D11_SB_OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY) {
+        OP::OpCode OpCode = OP::OpCode::BufferLoad;
+        OperandValue InRes, InByteOffset;
+
+        // Resource.
+        LoadOperand(InRes, Inst, uOpRes, CMask::MakeXMask(), CompType::getInvalid());
+
+        // Create Load call.
+        Value *Args[4];
+        Args[0] = m_pOP->GetU32Const((unsigned)OpCode);       // OpCode
+        Args[1] = InRes[0];                                   // [RW]ByteAddressBuffer UAV/SRV handle
+        Args[2] = pByteOffset;                                // Coord 0: in bytes
+        Args[3] = m_pUnusedI32;                               // Coord 1: unused
+
+        CompType DstType = CompType::getI32();
+        Type *pDstType = DstType.GetLLVMType(m_Ctx);
+        Function *F = m_pOP->GetOpFunc(OpCode, pDstType);
+        Value *pOpRet = m_pBuilder->CreateCall(F, Args);
+
+        StoreResRetOutputAndStatus(Inst, pOpRet, DstType);
+      } else {
+        const unsigned uOpTGSM = uOpRes;
+        CompType SrcType = CompType::getI32();
+        ConvertLoadTGSM(Inst, uOpTGSM, uOpOutput, SrcType, pByteOffset);
+      }
+
+      break;
+    }
+
+    case D3D11_SB_OPCODE_STORE_RAW: {
+      const unsigned uOpRes = 0;
+      const unsigned uOpByteOffset = uOpRes + 1;
+      const unsigned uOpValue = uOpByteOffset + 1;
+      // Byte offset.
+      Value *pByteOffset = GetCoordValue(Inst, uOpByteOffset);
+
+      if (Inst.m_Operands[uOpRes].m_Type == D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW) {
+        const unsigned uOpUAV = uOpRes;
+        OP::OpCode OpCode = OP::OpCode::BufferStore;
+        DXASSERT_DXBC(Inst.m_Operands[uOpUAV].m_Type == D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW);
+        OperandValue InUAV, InByteOffset, InValue;
+
+        // Resource.
+        LoadOperand(InUAV, Inst, uOpUAV, CMask::MakeXMask(), CompType::getInvalid());
+
+        // Value type.
+        CompType ValueType = CompType::getI32();
+        Type *pValueType = ValueType.GetLLVMType(m_Ctx);
+
+        // Value.
+        CMask ValueMask = CMask::FromDXBC(Inst.m_Operands[uOpUAV].m_WriteMask);
+        LoadOperand(InValue, Inst, uOpValue, ValueMask, ValueType);
+
+        // Create Store call.
+        Value *Args[9];
+        Args[0] = m_pOP->GetU32Const((unsigned)OpCode);           // OpCode
+        Args[1] = InUAV[0];                                       // RWByteAddressBuffer UAV handle
+        Args[2] = pByteOffset;                                    // Coord 0: in bytes
+        Args[3] = m_pUnusedI32;                                   // Coord 1: undef
+        Args[4] = ValueMask.IsSet(0) ? InValue[0] : m_pUnusedI32; // Value 0
+        Args[5] = ValueMask.IsSet(1) ? InValue[1] : m_pUnusedI32; // Value 1
+        Args[6] = ValueMask.IsSet(2) ? InValue[2] : m_pUnusedI32; // Value 2
+        Args[7] = ValueMask.IsSet(3) ? InValue[3] : m_pUnusedI32; // Value 3
+        Args[8] = m_pOP->GetU8Const(ValueMask.ToByte());          // Value mask
+
+        Function *F = m_pOP->GetOpFunc(OpCode, pValueType);
+        MarkPrecise(m_pBuilder->CreateCall(F, Args));
+      } else {
+        const unsigned uOpTGSM = uOpRes;
+        CompType ValueType = CompType::getI32();
+        ConvertStoreTGSM(Inst, uOpTGSM, uOpValue, ValueType, pByteOffset);
+      }
+
+      break;
+    }
+
+    case D3D11_SB_OPCODE_LD_STRUCTURED:
+    case D3DWDDM1_3_SB_OPCODE_LD_STRUCTURED_FEEDBACK: {
+      bool bHasFeedback = DXBC::HasFeedback(Inst.OpCode());
+      const unsigned uOpOutput = 0;
+      const unsigned uOpStatus = 1;
+      const unsigned uOpElementOffset = uOpStatus + (bHasFeedback ? 1 : 0);
+      const unsigned uOpStructByteOffset = uOpElementOffset + 1;
+      const unsigned uOpRes = uOpStructByteOffset + 1;
+
+      if (Inst.m_Operands[uOpRes].m_Type != D3D11_SB_OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY) {
+        OP::OpCode OpCode = OP::OpCode::BufferLoad;
+        OperandValue InRes, InElementOffset, InStructByteOffset;
+
+        // Resource.
+        LoadOperand(InRes, Inst, uOpRes, CMask::MakeXMask(), CompType::getInvalid());
+
+        // Create Load call.
+        Value *Args[4];
+        Args[0] = m_pOP->GetU32Const((unsigned)OpCode);     // OpCode
+        Args[1] = InRes[0];                                 // [RW]ByteAddressBuffer UAV/SRV handle
+        Args[2] = GetCoordValue(Inst, uOpElementOffset);    // Coord 1: element index
+        Args[3] = GetCoordValue(Inst, uOpStructByteOffset); // Coord 2: byte offset within the element
+
+        CompType DstType = CompType::getI32();
+        Type *pDstType = DstType.GetLLVMType(m_Ctx);
+        Function *F = m_pOP->GetOpFunc(OpCode, pDstType);
+        Value *pOpRet = m_pBuilder->CreateCall(F, Args);
+
+        StoreResRetOutputAndStatus(Inst, pOpRet, DstType);
+      } else {
+        const unsigned uOpTGSM = uOpRes;
+        DXASSERT_DXBC(Inst.m_Operands[uOpTGSM].m_Type == D3D11_SB_OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY);
+        const TGSMEntry &R = m_TGSMMap[Inst.m_Operands[uOpTGSM].m_Index[0].m_RegIndex];
+        CompType SrcType = CompType::getF32();
+
+        // Byte offset.
+        Value *pByteOffset = GetByteOffset(Inst, uOpElementOffset, uOpStructByteOffset, R.Stride);
+
+        ConvertLoadTGSM(Inst, uOpTGSM, uOpOutput, SrcType, pByteOffset);
+      }
+
+      break;
+    }
+
+    case D3D11_SB_OPCODE_STORE_STRUCTURED: {
+      const unsigned uOpRes = 0;
+      const unsigned uOpElementOffset = uOpRes + 1;
+      const unsigned uOpStructByteOffset = uOpElementOffset + 1;
+      const unsigned uOpValue = uOpStructByteOffset + 1;
+
+      if (Inst.m_Operands[0].m_Type == D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW) {
+        OP::OpCode OpCode = OP::OpCode::BufferStore;
+        const unsigned uOpUAV = uOpRes;
+        DXASSERT_DXBC(Inst.m_Operands[uOpUAV].m_Type == D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW);
+        OperandValue InUAV, InElementOffset, InStructByteOffset, InValue;
+
+        // Resource.
+        LoadOperand(InUAV, Inst, uOpUAV, CMask::MakeXMask(), CompType::getInvalid());
+
+        // Value type.
+        CompType ValueType = CompType::getI32();
+        Type *pValueType = ValueType.GetLLVMType(m_Ctx);
+
+        // Value.
+        CMask ValueMask = CMask::FromDXBC(Inst.m_Operands[uOpUAV].m_WriteMask);
+        LoadOperand(InValue, Inst, uOpValue, ValueMask, ValueType);
+
+        // Create Store call.
+        Value *Args[9];
+        Args[0] = m_pOP->GetU32Const((unsigned)OpCode);           // OpCode
+        Args[1] = InUAV[0];                                       // RWByteAddressBuffer UAV handle
+        Args[2] = GetCoordValue(Inst, uOpElementOffset);          // Coord 1: element index
+        Args[3] = GetCoordValue(Inst, uOpStructByteOffset);       // Coord 2: byte offset within the element
+        Args[4] = ValueMask.IsSet(0) ? InValue[0] : m_pUnusedI32; // Value 0
+        Args[5] = ValueMask.IsSet(1) ? InValue[1] : m_pUnusedI32; // Value 1
+        Args[6] = ValueMask.IsSet(2) ? InValue[2] : m_pUnusedI32; // Value 2
+        Args[7] = ValueMask.IsSet(3) ? InValue[3] : m_pUnusedI32; // Value 3
+        Args[8] = m_pOP->GetU8Const(ValueMask.ToByte());          // Value mask
+
+        Function *F = m_pOP->GetOpFunc(OpCode, pValueType);
+        MarkPrecise(m_pBuilder->CreateCall(F, Args));
+      } else {
+        const unsigned uOpTGSM = uOpRes;
+        const TGSMEntry &R = m_TGSMMap[Inst.m_Operands[uOpTGSM].m_Index[0].m_RegIndex];
+        CompType ValueType = CompType::getF32();
+
+        // Byte offset.
+        Value *pByteOffset = GetByteOffset(Inst, uOpElementOffset, uOpStructByteOffset, R.Stride);
+
+        ConvertStoreTGSM(Inst, uOpTGSM, uOpValue, ValueType, pByteOffset);
+      }
+
+      break;
+    }
+
+    //
+    // Atomic operations.
+    //
+    case D3D11_SB_OPCODE_ATOMIC_AND:
+    case D3D11_SB_OPCODE_ATOMIC_OR:
+    case D3D11_SB_OPCODE_ATOMIC_XOR:
+    case D3D11_SB_OPCODE_ATOMIC_IADD:
+    case D3D11_SB_OPCODE_ATOMIC_IMAX:
+    case D3D11_SB_OPCODE_ATOMIC_IMIN:
+    case D3D11_SB_OPCODE_ATOMIC_UMAX:
+    case D3D11_SB_OPCODE_ATOMIC_UMIN:
+    case D3D11_SB_OPCODE_IMM_ATOMIC_IADD:
+    case D3D11_SB_OPCODE_IMM_ATOMIC_AND:
+    case D3D11_SB_OPCODE_IMM_ATOMIC_OR:
+    case D3D11_SB_OPCODE_IMM_ATOMIC_XOR:
+    case D3D11_SB_OPCODE_IMM_ATOMIC_EXCH:
+    case D3D11_SB_OPCODE_IMM_ATOMIC_IMAX:
+    case D3D11_SB_OPCODE_IMM_ATOMIC_IMIN:
+    case D3D11_SB_OPCODE_IMM_ATOMIC_UMAX:
+    case D3D11_SB_OPCODE_IMM_ATOMIC_UMIN:
+    case D3D11_SB_OPCODE_ATOMIC_CMP_STORE:
+    case D3D11_SB_OPCODE_IMM_ATOMIC_CMP_EXCH: {
+      bool bHasReturn = DXBC::AtomicBinOpHasReturn(Inst.OpCode());
+      bool bHasCompare = DXBC::IsCompareExchAtomicBinOp(Inst.OpCode());
+      const unsigned uOpRes = bHasReturn ? 1 : 0;
+      const unsigned uOpCoord = uOpRes + 1;
+      const unsigned uOpCompareValue = uOpCoord + (bHasCompare ? 1 : 0);
+      const unsigned uOpValue = uOpCompareValue + 1;
+
+      if (Inst.m_Operands[uOpRes].m_Type == D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW) {
+        const unsigned uOpUAV = uOpRes;
+        const DxilResource &R = m_pPR->GetUAV(m_UAVRangeMap[Inst.m_Operands[uOpUAV].m_Index[0].m_RegIndex]);
+        OperandValue InUAV, InCoord, InCompareValue, InValue;
+
+        // Resource.
+        LoadOperand(InUAV, Inst, uOpUAV, CMask::MakeXMask(), CompType::getInvalid());
+
+        // Coordinates.
+        CMask CoordMask = CMask::MakeFirstNCompMask(DxilResource::GetNumCoords(R.GetKind()));
+        LoadOperand(InCoord, Inst, uOpCoord, CoordMask, CompType::getI32());
+        Value *pOffset[3];
+        pOffset[0] = InCoord[0];
+        pOffset[1] = CoordMask.IsSet(1) ? InCoord[1] : m_pUnusedI32;
+        pOffset[2] = CoordMask.IsSet(2) ? InCoord[2] : m_pUnusedI32;
+
+        // Value type.
+        CompType ValueType = CompType::getI32();
+        Type *pValueType = ValueType.GetLLVMType(m_Ctx);
+
+        // Compare value.
+        if (bHasCompare) {
+          LoadOperand(InCompareValue, Inst, uOpCompareValue, CMask::MakeXMask(), ValueType);
+        }
+
+        // Value.
+        LoadOperand(InValue, Inst, uOpValue, CMask::MakeXMask(), ValueType);
+
+        // Create atomic call.
+        Value *pOpRet;
+        if (!bHasCompare) {
+          OP::OpCode OpCode = OP::OpCode::AtomicBinOp;
+          Value *Args[7];
+          Args[0] = m_pOP->GetU32Const((unsigned)OpCode);                         // OpCode
+          Args[1] = InUAV[0];                                                     // Typed (uint/int) UAV handle
+          Args[2] = m_pOP->GetU32Const((unsigned)DXBC::GetAtomicBinOp(Inst.OpCode()));  // Atomic operation kind.
+          Args[3] = pOffset[0];                                                   // Offset 0, in elements
+          Args[4] = pOffset[1];                                                   // Offset 1
+          Args[5] = pOffset[2];                                                   // Offset 2
+          Args[6] = InValue[0];                                                   // New value
+
+          Function *F = m_pOP->GetOpFunc(OpCode, pValueType);
+          pOpRet = m_pBuilder->CreateCall(F, Args);
+        } else {
+          OP::OpCode OpCode = OP::OpCode::AtomicCompareExchange;
+          Value *Args[7];
+          Args[0] = m_pOP->GetU32Const((unsigned)OpCode);                         // OpCode
+          Args[1] = InUAV[0];                                                     // Typed (uint/int) UAV handle
+          Args[2] = pOffset[0];                                                   // Offset 0, in elements
+          Args[3] = pOffset[1];                                                   // Offset 1
+          Args[4] = pOffset[2];                                                   // Offset 2
+          Args[5] = InCompareValue[0];                                            // Compare value
+          Args[6] = InValue[0];                                                   // New value
+
+          Function *F = m_pOP->GetOpFunc(OpCode, pValueType);
+          pOpRet = m_pBuilder->CreateCall(F, Args);
+        }
+
+        StoreBroadcastOutput(Inst, pOpRet, ValueType);
+      } else {
+        const unsigned uOpTGSM = uOpRes;
+        DXASSERT_DXBC(Inst.m_Operands[uOpTGSM].m_Type == D3D11_SB_OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY);
+        const TGSMEntry &R = m_TGSMMap[Inst.m_Operands[uOpTGSM].m_Index[0].m_RegIndex];
+        OperandValue InElementOffset, InCompareValue, InValue;
+
+        // Byte offset.
+        CMask ElementOffsetMask = CMask::MakeFirstNCompMask(R.Stride == 1 ? 1 : 2);
+        LoadOperand(InElementOffset, Inst, uOpCoord, ElementOffsetMask, CompType::getI32());
+        Value *pByteOffset = InElementOffset[0];
+        if (R.Stride > 1) { // Structured TGSM.
+          Value *pOffset2 = InElementOffset[1];
+          Value *pStride = m_pOP->GetU32Const(R.Stride);
+          pByteOffset = m_pBuilder->CreateAdd(m_pBuilder->CreateMul(pByteOffset, pStride), pOffset2);
+        }
+
+        // Value type.
+        CompType ValueType = CompType::getI32();
+
+        // Compare value.
+        if (bHasCompare) {
+          LoadOperand(InCompareValue, Inst, uOpCompareValue, CMask::MakeXMask(), ValueType);
+        }
+
+        CompType DstType = CompType::getI32();
+        Type *pDstType = Type::getInt32PtrTy(m_Ctx, DXIL::kTGSMAddrSpace);
+
+        // Value.
+        LoadOperand(InValue, Inst, uOpValue, CMask::MakeXMask(), ValueType);
+
+        // Create GEP.
+        Value *pGEPIndices[2] = { m_pOP->GetU32Const(0), pByteOffset };
+        Value *pPtrI8 = m_pBuilder->CreateGEP(R.pVar, pGEPIndices);
+        Value *pPtr = m_pBuilder->CreatePointerCast(pPtrI8, pDstType);
+
+        // Generate atomic instruction.
+        Value *pRetVal;
+        if (!bHasCompare) {
+          pRetVal = m_pBuilder->CreateAtomicRMW(DXBC::GetLlvmAtomicBinOp(Inst.OpCode()), pPtr, InValue[0], AtomicOrdering::Monotonic);
+        } else {
+          pRetVal = m_pBuilder->CreateAtomicCmpXchg(pPtr, InCompareValue[0], InValue[0], AtomicOrdering::Monotonic, AtomicOrdering::Monotonic);
+          Type *RetTypeFields[2] = { Type::getInt32Ty(m_Ctx), Type::getInt1Ty(m_Ctx) };
+          pRetVal = m_pBuilder->CreateExtractValue(pRetVal, 0);
+        }
+
+        StoreBroadcastOutput(Inst, pRetVal, ValueType);
+      }
+
+      break;
+    }
+
+    case D3D10_1_SB_OPCODE_GATHER4:
+    case D3DWDDM1_3_SB_OPCODE_GATHER4_FEEDBACK: {
+      OP::OpCode OpCode = OP::OpCode::TextureGather;
+      bool bHasFeedback = DXBC::HasFeedback(Inst.OpCode());
+      const unsigned uOpOutput = 0;
+      const unsigned uOpCoord = uOpOutput + 1 + (bHasFeedback ? 1 : 0);
+      const unsigned uOpSRV = DXBC::GetResourceSlot(Inst.OpCode());
+      const unsigned uOpSampler = uOpSRV + 1;
+      const DxilResource &R = GetSRVFromOperand(Inst, uOpSRV);
+      Value *Args[10];
+
+      LoadCommonSampleInputs(Inst, &Args[0]);
+
+      // Other arguments.
+      Args[0] = m_pOP->GetU32Const((unsigned)OpCode);
+      // Offset.
+      bool bUseOffset = (R.GetKind() == DxilResource::Kind::Texture2D) || 
+                        (R.GetKind() == DxilResource::Kind::Texture2DArray);
+      if (!bUseOffset) {
+        Args[7] = m_pUnusedI32;
+        Args[8] = m_pUnusedI32;
+      }
+      // Channel.
+      unsigned uChannel = Inst.m_Operands[uOpSampler].m_ComponentName;
+      Args[9] = m_pOP->GetU32Const(uChannel);
+
+      // Function call.
+      CompType DstType = DXBC::GetCompTypeWithMinPrec(R.GetCompType().GetBaseCompType(), Inst.m_Operands[uOpOutput].m_MinPrecision);
+      Type *pDstType = DstType.GetLLVMType(m_Ctx);
+      Function *F = m_pOP->GetOpFunc(OpCode, pDstType);
+      Value *pOpRet = m_pBuilder->CreateCall(F, Args);
+
+      StoreResRetOutputAndStatus(Inst, pOpRet, DstType);
+      break;
+    }
+
+    case D3D11_SB_OPCODE_GATHER4_C:
+    case D3DWDDM1_3_SB_OPCODE_GATHER4_C_FEEDBACK: {
+      OP::OpCode OpCode = OP::OpCode::TextureGatherCmp;
+      bool bHasFeedback = DXBC::HasFeedback(Inst.OpCode());
+      const unsigned uOpOutput = 0;
+      const unsigned uOpCoord = uOpOutput + 1 + (bHasFeedback ? 1 : 0);
+      const unsigned uOpSRV = DXBC::GetResourceSlot(Inst.OpCode());
+      const unsigned uOpSampler = uOpSRV + 1;
+      const unsigned uOpCmp = uOpSampler + 1;
+      const DxilResource &R = GetSRVFromOperand(Inst, uOpSRV);
+      Value *Args[11];
+
+      LoadCommonSampleInputs(Inst, &Args[0]);
+
+      // Other arguments.
+      Args[0] = m_pOP->GetU32Const((unsigned)OpCode);
+      // Offset.
+      bool bUseOffset = (R.GetKind() == DxilResource::Kind::Texture2D) || 
+                        (R.GetKind() == DxilResource::Kind::Texture2DArray);
+      if (!bUseOffset) {
+        Args[7] = m_pUnusedI32;
+        Args[8] = m_pUnusedI32;
+      }
+      // Channel.
+      unsigned uChannel = Inst.m_Operands[uOpSampler].m_ComponentName;
+      Args[9] = m_pOP->GetU32Const(uChannel);
+      // Comparison value.
+      OperandValue InCmp;
+      LoadOperand(InCmp, Inst, uOpCmp, CMask::MakeXMask(), CompType::getF32());
+      Args[10] = InCmp[0];
+
+      // Function call.
+      CompType DstType = DXBC::GetCompTypeWithMinPrec(R.GetCompType().GetBaseCompType(), Inst.m_Operands[uOpOutput].m_MinPrecision);
+      Type *pDstType = DstType.GetLLVMType(m_Ctx);
+      Function *F = m_pOP->GetOpFunc(OpCode, pDstType);
+      Value *pOpRet = m_pBuilder->CreateCall(F, Args);
+
+      StoreResRetOutputAndStatus(Inst, pOpRet, DstType);
+      break;
+    }
+
+    case D3D11_SB_OPCODE_GATHER4_PO:
+    case D3DWDDM1_3_SB_OPCODE_GATHER4_PO_FEEDBACK: {
+      OP::OpCode OpCode = OP::OpCode::TextureGather;
+      bool bHasFeedback = DXBC::HasFeedback(Inst.OpCode());
+      const unsigned uOpOutput = 0;
+      const unsigned uOpCoord = uOpOutput + 1 + (bHasFeedback ? 1 : 0);
+      const unsigned uOpOffset = uOpCoord + 1;
+      const unsigned uOpSRV = DXBC::GetResourceSlot(Inst.OpCode());
+      const unsigned uOpSampler = uOpSRV + 1;
+      const DxilResource &R = GetSRVFromOperand(Inst, uOpSRV);
+      Value *Args[10];
+
+      LoadCommonSampleInputs(Inst, &Args[0], false);
+
+      // Other arguments.
+      Args[0] = m_pOP->GetU32Const((unsigned)OpCode);
+      // Programmable offset.
+      OperandValue InOffset;
+      LoadOperand(InOffset, Inst, uOpOffset, CMask::MakeFirstNCompMask(2), CompType::getI32());
+      Args[7] = InOffset[0];
+      Args[8] = InOffset[1];
+      // Channel.
+      unsigned uChannel = Inst.m_Operands[uOpSampler].m_ComponentName;
+      Args[9] = m_pOP->GetU32Const(uChannel);
+
+      // Function call.
+      CompType DstType = DXBC::GetCompTypeWithMinPrec(R.GetCompType().GetBaseCompType(), Inst.m_Operands[uOpOutput].m_MinPrecision);
+      Type *pDstType = DstType.GetLLVMType(m_Ctx);
+      Function *F = m_pOP->GetOpFunc(OpCode, pDstType);
+      Value *pOpRet = m_pBuilder->CreateCall(F, Args);
+
+      StoreResRetOutputAndStatus(Inst, pOpRet, DstType);
+      break;
+    }
+
+    case D3D11_SB_OPCODE_GATHER4_PO_C:
+    case D3DWDDM1_3_SB_OPCODE_GATHER4_PO_C_FEEDBACK: {
+      OP::OpCode OpCode = OP::OpCode::TextureGatherCmp;
+      bool bHasFeedback = DXBC::HasFeedback(Inst.OpCode());
+      const unsigned uOpOutput = 0;
+      const unsigned uOpCoord = uOpOutput + 1 + (bHasFeedback ? 1 : 0);
+      const unsigned uOpOffset = uOpCoord + 1;
+      const unsigned uOpSRV = DXBC::GetResourceSlot(Inst.OpCode());
+      const unsigned uOpSampler = uOpSRV + 1;
+      const unsigned uOpCmp = uOpSampler + 1;
+      const DxilResource &R = GetSRVFromOperand(Inst, uOpSRV);
+      Value *Args[11];
+
+      LoadCommonSampleInputs(Inst, &Args[0], false);
+
+      // Other arguments.
+      Args[0] = m_pOP->GetU32Const((unsigned)OpCode);
+      // Programmable offset.
+      OperandValue InOffset;
+      LoadOperand(InOffset, Inst, uOpOffset, CMask::MakeFirstNCompMask(2), CompType::getI32());
+      Args[7] = InOffset[0];
+      Args[8] = InOffset[1];
+      // Channel.
+      unsigned uChannel = Inst.m_Operands[uOpSampler].m_ComponentName;
+      Args[9] = m_pOP->GetU32Const(uChannel);
+      // Comparison value.
+      OperandValue InCmp;
+      LoadOperand(InCmp, Inst, uOpCmp, CMask::MakeXMask(), CompType::getF32());
+      Args[10] = InCmp[0];
+
+      // Function call.
+      CompType DstType = DXBC::GetCompTypeWithMinPrec(R.GetCompType().GetBaseCompType(), Inst.m_Operands[uOpOutput].m_MinPrecision);
+      Type *pDstType = DstType.GetLLVMType(m_Ctx);
+      Function *F = m_pOP->GetOpFunc(OpCode, pDstType);
+      Value *pOpRet = m_pBuilder->CreateCall(F, Args);
+
+      StoreResRetOutputAndStatus(Inst, pOpRet, DstType);
+      break;
+    }
+
+    case D3D10_1_SB_OPCODE_SAMPLE_POS: {
+      const unsigned uOpOutput = 0;
+      const unsigned uOpResOrRast = uOpOutput + 1;
+      const unsigned uOpSample = uOpResOrRast + 1;
+
+      // Sample.
+      OperandValue InSample;
+      LoadOperand(InSample, Inst, uOpSample, CMask::MakeXMask(), CompType::getI32());
+      Value *pOpRet;
+
+      if (Inst.m_Operands[uOpResOrRast].m_Type == D3D10_SB_OPERAND_TYPE_RESOURCE) {
+        // Resource.
+        OP::OpCode OpCode = OP::OpCode::Texture2DMSGetSamplePosition;
+        OperandValue InRes;
+        LoadOperand(InRes, Inst, uOpResOrRast, CMask::MakeXMask(), CompType::getInvalid());
+
+        // Create SamplePosition call.
+        Value *Args[3];
+        Args[0] = m_pOP->GetU32Const((unsigned)OpCode);   // OpCode
+        Args[1] = InRes[0];                               // Resource handle
+        Args[2] = InSample[0];                            // Sample index
+
+        Function *F = m_pOP->GetOpFunc(OpCode, Type::getVoidTy(m_Ctx));
+        pOpRet = m_pBuilder->CreateCall(F, Args);
+      } else {
+        // Render target.
+        OP::OpCode OpCode = OP::OpCode::RenderTargetGetSamplePosition;
+
+        // Create SamplePosition call.
+        Value *Args[2];
+        Args[0] = m_pOP->GetU32Const((unsigned)OpCode);   // OpCode
+        Args[1] = InSample[0];                            // Sample index
+
+        Function *F = m_pOP->GetOpFunc(OpCode, Type::getVoidTy(m_Ctx));
+        pOpRet = m_pBuilder->CreateCall(F, Args);
+      }
+
+      StoreSamplePosOutput(Inst, pOpRet);
+      break;
+    }
+
+    case D3DWDDM1_3_SB_OPCODE_CHECK_ACCESS_FULLY_MAPPED: {
+      OP::OpCode OpCode = OP::OpCode::CheckAccessFullyMapped;
+      OperandValue InStatus;
+      LoadOperand(InStatus, Inst, 1, CMask::MakeXMask(), CompType::getI32());
+
+      // Create CheckAccessFullyMapped call.
+      Value *Args[2];
+      Args[0] = m_pOP->GetU32Const((unsigned)OpCode);           // OpCode
+      Args[1] = InStatus[Inst.m_Operands[0].m_ComponentName];   // Status
+
+      Function *F = m_pOP->GetOpFunc(OpCode, Type::getInt32Ty(m_Ctx));
+      Value *pRetValue = m_pBuilder->CreateCall(F, Args);
+      pRetValue = CastDxbcValue(pRetValue, CompType::getI1(), CompType::getI32());
+
+      StoreBroadcastOutput(Inst, pRetValue, CompType::getI32());
+      break;
+    }
+
+    case D3D10_SB_OPCODE_RESINFO: {
+      OP::OpCode OpCode = OP::OpCode::GetDimensions;
+      const unsigned uOpOutput = 0;
+      const unsigned uOpMipLevel = uOpOutput + 1;
+      const unsigned uOpRes = uOpMipLevel + 1;
+
+      // MipLevel.
+      OperandValue InMipLevel;
+      LoadOperand(InMipLevel, Inst, uOpMipLevel, CMask::MakeXMask(), CompType::getI32());
+
+      // Resource.
+      OperandValue InRes;
+      LoadOperand(InRes, Inst, uOpRes, CMask::MakeXMask(), CompType::getInvalid());
+
+      // Create GetDimensions call.
+      Value *Args[3];
+      Args[0] = m_pOP->GetU32Const((unsigned)OpCode);             // OpCode
+      Args[1] = InRes[0];                               // Resource handle
+      Args[2] = InMipLevel[0];                          // MipLevel
+
+      Function *F = m_pOP->GetOpFunc(OpCode, Type::getVoidTy(m_Ctx));
+      Value *pOpRet = m_pBuilder->CreateCall(F, Args);
+
+      StoreGetDimensionsOutput(Inst, pOpRet);
+      break;
+    }
+
+    case D3D11_SB_OPCODE_BUFINFO: {
+      OP::OpCode OpCode = OP::OpCode::GetDimensions;
+      const unsigned uOpOutput = 0;
+      const unsigned uOpRes = uOpOutput + 1;
+
+      // Resource.
+      OperandValue InRes;
+      LoadOperand(InRes, Inst, uOpRes, CMask::MakeXMask(), CompType::getInvalid());
+
+      // Create GetDimensions call.
+      Value *Args[3];
+      Args[0] = m_pOP->GetU32Const((unsigned)OpCode);     // OpCode
+      Args[1] = InRes[0];                                 // Resource handle
+      Args[2] = m_pUnusedI32;                             // MipLevel (undefined)
+
+      Function *F = m_pOP->GetOpFunc(OpCode, Type::getVoidTy(m_Ctx));
+      Value *pOpRet = m_pBuilder->CreateCall(F, Args);
+      Value *pOpWidth = m_pBuilder->CreateExtractValue(pOpRet, 0);
+
+      // Store output.
+      StoreBroadcastOutput(Inst, pOpWidth, CompType::getI32());
+      break;
+    }
+
+    case D3D10_1_SB_OPCODE_SAMPLE_INFO: {
+      const unsigned uOpOutput = 0;
+      const unsigned uOpResOrRast = uOpOutput + 1;
+
+      bool bDxbcRetFloat = true;
+      if (Inst.m_InstructionReturnType == D3D10_SB_INSTRUCTION_RETURN_UINT) {
+        bDxbcRetFloat = false;
+      }
+
+      // Return type.
+      CompType DstType;
+      if (bDxbcRetFloat) {
+        DstType = DXBC::GetCompTypeWithMinPrec(CompType::getF32(), Inst.m_Operands[uOpOutput].m_MinPrecision);
+      } else {
+        DstType = DXBC::GetCompTypeWithMinPrec(CompType::getI32(), Inst.m_Operands[uOpOutput].m_MinPrecision);
+      }
+
+      Value *pRetValue;
+
+      if (Inst.m_Operands[uOpResOrRast].m_Type == D3D10_SB_OPERAND_TYPE_RESOURCE) {
+        // Resource.
+        OP::OpCode OpCode = OP::OpCode::GetDimensions;
+
+        OperandValue InRes;
+        LoadOperand(InRes, Inst, uOpResOrRast, CMask::MakeXMask(), CompType::getInvalid());
+
+        // Create GetDimensions call.
+        Value *Args[3];
+        Args[0] = m_pOP->GetU32Const((unsigned)OpCode);             // OpCode
+        Args[1] = InRes[0];                               // Resource handle
+        Args[2] = m_pOP->GetU32Const(0);                  // MipLevel
+
+        Function *F = m_pOP->GetOpFunc(OpCode, Type::getVoidTy(m_Ctx));
+        Value *pOpRet = m_pBuilder->CreateCall(F, Args);
+        pRetValue = m_pBuilder->CreateExtractValue(pOpRet, 3);
+      } else {
+        OP::OpCode OpCode = OP::OpCode::RenderTargetGetSampleCount;
+
+        // Create SampleCount call.
+        Value *Args[1];
+        Args[0] = m_pOP->GetU32Const((unsigned)OpCode);             // OpCode
+
+        Function *F = m_pOP->GetOpFunc(OpCode, Type::getVoidTy(m_Ctx));
+        pRetValue = m_pBuilder->CreateCall(F, Args);
+      }
+
+      Value *pZeroValue;
+      if (bDxbcRetFloat) {
+        pRetValue = m_pBuilder->CreateCast(Instruction::CastOps::UIToFP, pRetValue, Type::getFloatTy(m_Ctx));
+        pZeroValue = m_pOP->GetFloatConst(0.f);
+      } else {
+        pZeroValue = m_pOP->GetU32Const(0);
+      }
+
+      // Store output.
+      CMask OutputMask = CMask::FromDXBC(Inst.m_Operands[uOpOutput].m_WriteMask);
+      if (!OutputMask.IsZero()) {
+        OperandValue Out;
+        for (BYTE c = 0; c < DXBC::kWidth; c++) {
+          if (!OutputMask.IsSet(c)) continue;
+
+          BYTE Comp = Inst.m_Operands[uOpResOrRast].m_Swizzle[c];
+          if (Comp == 0) {
+            Out[c] = pRetValue;
+          } else {
+            Out[c] = pZeroValue;
+          }
+        }
+        StoreOperand(Out, Inst, uOpOutput, OutputMask, DstType);
+      }
+      break;
+    }
+
+    case D3D11_SB_OPCODE_IMM_ATOMIC_ALLOC:
+    case D3D11_SB_OPCODE_IMM_ATOMIC_CONSUME: {
+      OP::OpCode OpCode = OP::OpCode::BufferUpdateCounter;
+      const unsigned uOpOutput = 0;
+      const unsigned uOpUAV = uOpOutput + 1;
+      bool bInc = Inst.OpCode() == D3D11_SB_OPCODE_IMM_ATOMIC_ALLOC;
+
+      // Resource.
+      OperandValue InRes;
+      LoadOperand(InRes, Inst, uOpUAV, CMask::MakeXMask(), CompType::getInvalid());
+
+      // Create BufferUpdateCounter call.
+      Value *Args[3];
+      Args[0] = m_pOP->GetU32Const((unsigned)OpCode);             // OpCode
+      Args[1] = InRes[0];                               // Resource handle
+      Args[2] = m_pOP->GetI8Const(bInc ? 1 : -1);       // Inc or Dec
+
+      CompType DstType = DXBC::GetCompTypeWithMinPrec(CompType::getI32(), Inst.m_Operands[uOpOutput].m_MinPrecision);
+      Function *F = m_pOP->GetOpFunc(OpCode, Type::getVoidTy(m_Ctx));
+      Value *pOpRet = m_pBuilder->CreateCall(F, Args);
+
+      StoreBroadcastOutput(Inst, pOpRet, DstType);
+      break;
+    }
+
+    case D3D11_SB_OPCODE_SYNC: {
+      OP::OpCode OpCode = OP::OpCode::Barrier;
+      DXIL::BarrierMode BMode = DXBC::GetBarrierMode(Inst.m_SyncFlags.bThreadsInGroup,
+                                                     Inst.m_SyncFlags.bUnorderedAccessViewMemoryGlobal,
+                                                     Inst.m_SyncFlags.bUnorderedAccessViewMemoryGroup,
+                                                     Inst.m_SyncFlags.bThreadGroupSharedMemory);
+
+      // Create BufferUpdateCounter call.
+      Value *Args[2];
+      Args[0] = m_pOP->GetU32Const((unsigned)OpCode);             // OpCode
+      Args[1] = m_pOP->GetU32Const((unsigned)BMode);    // Barrier mode
+
+      Function *F = m_pOP->GetOpFunc(OpCode, Type::getVoidTy(m_Ctx));
+      MarkPrecise(m_pBuilder->CreateCall(F, Args));
+      break;
+    }
+
+      //
+      // Control-flow operations.
+      //
+    case D3D10_SB_OPCODE_IF: {
+      DXASSERT_DXBC(Inst.m_NumOperands == 1);
+      // Create If-scope.
+      Scope &Scope = m_ScopeStack.Push(Scope::If, m_pBuilder->GetInsertBlock());
+
+      // Prepare condition.
+      Scope.pCond = LoadZNZCondition(Inst, 0);
+
+      // Create then-branch BB and set it as active.
+      Scope.pThenBB = BasicBlock::Create(m_Ctx, Twine("if") + Twine(Scope.NameIndex) + Twine(".then"), pFunction);
+      m_pBuilder->SetInsertPoint(Scope.pThenBB);
+
+      // Create endif BB.
+      Scope.pPostScopeBB = BasicBlock::Create(m_Ctx, Twine("if") + Twine(Scope.NameIndex) + Twine(".end"));
+      break;
+    }
+
+    case D3D10_SB_OPCODE_ELSE: {
+      // Get If-scope.
+      Scope &Scope = m_ScopeStack.Top();
+      IFTBOOL(Scope.Kind == Scope::If, E_FAIL);
+
+      // Terminate then-branch.
+      CreateBranchIfNeeded(m_pBuilder->GetInsertBlock(), Scope.pPostScopeBB);
+
+      // Create else-branch BB and set it as active.
+      Scope.pElseBB = BasicBlock::Create(m_Ctx, Twine("if") + Twine(Scope.NameIndex) + Twine(".else"), pFunction);
+      m_pBuilder->SetInsertPoint(Scope.pElseBB);
+      break;
+    }
+
+    case D3D10_SB_OPCODE_ENDIF: {
+      // Get If-scope.
+      Scope &Scope = m_ScopeStack.Top();
+      IFTBOOL(Scope.Kind == Scope::If, E_FAIL);
+
+      // Terminate else-branch.
+      CreateBranchIfNeeded(m_pBuilder->GetInsertBlock(), Scope.pPostScopeBB);
+
+      // Insert IF cbranch.
+      m_pBuilder->SetInsertPoint(Scope.pPreScopeBB);
+      if (Scope.pElseBB != nullptr) {
+        m_pBuilder->CreateCondBr(Scope.pCond, Scope.pThenBB, Scope.pElseBB);
+      } else {
+        m_pBuilder->CreateCondBr(Scope.pCond, Scope.pThenBB, Scope.pPostScopeBB);
+      }
+
+      // Set endif BB as active.
+      pFunction->getBasicBlockList().push_back(Scope.pPostScopeBB);
+      m_pBuilder->SetInsertPoint(Scope.pPostScopeBB);
+
+      // Finish If-scope.
+      m_ScopeStack.Pop();
+      break;
+    }
+
+    case D3D10_SB_OPCODE_LOOP: {
+      DXASSERT_DXBC(Inst.m_NumOperands == 0);
+      // Create Loop-scope.
+      Scope &Scope = m_ScopeStack.Push(Scope::Loop, m_pBuilder->GetInsertBlock());
+
+      // Create Loop and EndLoop BBs.
+      Scope.pLoopBB = BasicBlock::Create(m_Ctx, Twine("loop") + Twine(Scope.NameIndex), pFunction);
+      Scope.pPostScopeBB = BasicBlock::Create(m_Ctx, Twine("loop") + Twine(Scope.NameIndex) + Twine(".end"));
+
+      // Insert branch to Loop BB.
+      m_pBuilder->CreateBr(Scope.pLoopBB);
+
+      // Set Loop BB as active.
+      m_pBuilder->SetInsertPoint(Scope.pLoopBB);
+      break;
+    }
+
+    case D3D10_SB_OPCODE_ENDLOOP: {
+      // Get Loop-scope.
+      Scope &Scope = m_ScopeStack.Top();
+      IFTBOOL(Scope.Kind == Scope::Loop, E_FAIL);
+
+      // Insert back-edge.
+      CreateBranchIfNeeded(m_pBuilder->GetInsertBlock(), Scope.pLoopBB);
+
+      // Set EndLoop BB as active.
+      pFunction->getBasicBlockList().push_back(Scope.pPostScopeBB);
+      m_pBuilder->SetInsertPoint(Scope.pPostScopeBB);
+
+      // Finish Loop-scope.
+      m_ScopeStack.Pop();
+      break;
+    }
+
+    case D3D10_SB_OPCODE_SWITCH: {
+      DXASSERT_DXBC(Inst.m_NumOperands == 1);
+      // Create Switch-scope.
+      Scope &Scope = m_ScopeStack.Push(Scope::Switch, m_pBuilder->GetInsertBlock());
+
+      // Prepare selector.
+      BYTE Comp = (BYTE)Inst.m_Operands[0].m_ComponentName;
+      CMask ReadMask = CMask::MakeCompMask(Comp);
+      OperandValue In1;
+      LoadOperand(In1, Inst, 0, ReadMask, CompType::getI32());
+      Scope.pSelector = In1[Comp];
+
+      // Create 1st casegroup BB and set it as active.
+      BasicBlock *pBB = BasicBlock::Create(m_Ctx, Twine("switch") + Twine(Scope.NameIndex) + 
+                                                  Twine(".casegroup") + Twine(Scope.CaseGroupIndex++), pFunction);
+      m_pBuilder->SetInsertPoint(pBB);
+
+      // Create endswitch BB.
+      Scope.pPostScopeBB = BasicBlock::Create(m_Ctx, Twine("switch") + Twine(Scope.NameIndex) + Twine(".end"));
+      break;
+    }
+
+    case D3D10_SB_OPCODE_CASE: {
+      DXASSERT_DXBC(Inst.m_NumOperands == 1);
+      // Get Switch-scope.
+      Scope &Scope = m_ScopeStack.Top();
+      IFTBOOL(Scope.Kind == Scope::Switch, E_FAIL);
+
+      // Retrieve selector value.
+      const D3D10ShaderBinary::COperandBase &O = Inst.m_Operands[0];
+      DXASSERT_DXBC(O.m_Type == D3D10_SB_OPERAND_TYPE_IMMEDIATE32 && O.m_NumComponents == D3D10_SB_OPERAND_1_COMPONENT);
+      int CaseValue = O.m_Value[0];
+
+      // Remember case clause.
+      pair<unsigned, BasicBlock*> Case(CaseValue, m_pBuilder->GetInsertBlock());
+      Scope.SwitchCases.emplace_back(Case);
+      break;
+    }
+
+    case D3D10_SB_OPCODE_DEFAULT: {
+      DXASSERT_DXBC(Inst.m_NumOperands == 0);
+      // Get Switch-scope.
+      Scope &Scope = m_ScopeStack.Top();
+      IFTBOOL(Scope.Kind == Scope::Switch, E_FAIL);
+
+      // Remember default clause.
+      Scope.pDefaultBB = m_pBuilder->GetInsertBlock();
+      break;
+    }
+
+    case D3D10_SB_OPCODE_ENDSWITCH: {
+      // Get Switch-scope.
+      Scope &Scope = m_ScopeStack.Top();
+      IFTBOOL(Scope.Kind == Scope::Switch, E_FAIL);
+
+      // Terminate case/default BB.
+      CreateBranchIfNeeded(m_pBuilder->GetInsertBlock(), Scope.pPostScopeBB);
+
+      // Insert switch branch.
+      m_pBuilder->SetInsertPoint(Scope.pPreScopeBB);
+      BasicBlock *pDefaultBB = Scope.pDefaultBB != nullptr ? Scope.pDefaultBB : Scope.pPostScopeBB;
+      SwitchInst *pSwitch = m_pBuilder->CreateSwitch(Scope.pSelector, pDefaultBB);
+      for (size_t i = 0; i < Scope.SwitchCases.size(); i++) {
+        auto &Case = Scope.SwitchCases[i];
+        if (Case.second == Scope.pDefaultBB) continue;
+
+        pSwitch->addCase(m_pBuilder->getInt32(Case.first), Case.second);
+      }
+
+      // Rename casegroups BBs.
+      SwitchInst *pSwI = dyn_cast<SwitchInst>(Scope.pPreScopeBB->getTerminator());
+      DXASSERT_NOMSG(pSwI != nullptr);
+      BasicBlock *pPrevCaseBB = nullptr;
+      unsigned CaseGroupIdx = 0;
+      for (auto itCase = pSwI->case_begin(), endCase = pSwI->case_end(); itCase != endCase; ++itCase) {
+        BasicBlock *pCaseBB = itCase.getCaseSuccessor();
+        if (pCaseBB != pPrevCaseBB) {
+          pCaseBB->setName(Twine("switch") + Twine(Scope.NameIndex) + Twine(".casegroup") + Twine(CaseGroupIdx++));
+          pPrevCaseBB = pCaseBB;
+        }
+      }
+
+      // Rename default BB.
+      if (Scope.pDefaultBB != nullptr) {
+        Scope.pDefaultBB->setName(Twine("switch") + Twine(Scope.NameIndex) + Twine(".default"));
+      }
+
+      // Set endswitch BB as active.
+      pFunction->getBasicBlockList().push_back(Scope.pPostScopeBB);
+      m_pBuilder->SetInsertPoint(Scope.pPostScopeBB);
+
+      // Finish Switch-scope.
+      m_ScopeStack.Pop();
+      break;
+    }
+
+    case D3D10_SB_OPCODE_CONTINUE: {
+      DXASSERT_DXBC(Inst.m_NumOperands == 0);
+      // Find parent scope.
+      Scope &Scope = m_ScopeStack.FindParentLoop();
+
+      // Create a new basic block.
+      BasicBlock *pNextBB = BasicBlock::Create(m_Ctx, Twine("loop") + Twine(Scope.NameIndex) + 
+                                                      Twine(".continue") + Twine(Scope.ContinueIndex++), pFunction);
+
+      // Insert branch to Loop BB.
+      m_pBuilder->CreateBr(Scope.pLoopBB);
+
+      // Set Next BB as active.
+      m_pBuilder->SetInsertPoint(pNextBB);
+      break;
+    }
+
+    case D3D10_SB_OPCODE_CONTINUEC: {
+      DXASSERT_DXBC(Inst.m_NumOperands == 1);
+
+      // Prepare condition.
+      Value *pCond = LoadZNZCondition(Inst, 0);
+
+      // Find parent scope.
+      Scope &Scope = m_ScopeStack.FindParentLoop();
+
+      // Create a new basic block.
+      BasicBlock *pNextBB = BasicBlock::Create(m_Ctx, Twine("loop") + Twine(Scope.NameIndex) + 
+                                                      Twine(".continuec") + Twine(Scope.ContinueIndex++), pFunction);
+
+      // Insert cbranch to Loop and Next BBs.
+      m_pBuilder->CreateCondBr(pCond, Scope.pLoopBB, pNextBB);
+
+      // Set Next BB as active.
+      m_pBuilder->SetInsertPoint(pNextBB);
+      break;
+    }
+
+    case D3D10_SB_OPCODE_BREAK: {
+      DXASSERT_DXBC(Inst.m_NumOperands == 0);
+      // Find parent scope.
+      Scope &Scope = m_ScopeStack.FindParentLoopOrSwitch();
+
+      // Create a new basic block.
+      BasicBlock *pNextBB;
+      if (Scope.Kind == Scope::Loop) {
+        pNextBB = BasicBlock::Create(m_Ctx, Twine("loop") + Twine(Scope.NameIndex) + 
+                                            Twine(".break") + Twine(Scope.LoopBreakIndex++), pFunction);
+      } else {
+        if (m_ScopeStack.Top().Kind == Scope::Switch) {
+          pNextBB = BasicBlock::Create(m_Ctx, Twine("switch") + Twine(Scope.NameIndex) + 
+                                              Twine(".tmpcasegroup") + Twine(Scope.CaseGroupIndex++), pFunction);
+        } else {
+          pNextBB = BasicBlock::Create(m_Ctx, Twine("switch") + Twine(Scope.NameIndex) + 
+                                              Twine(".break") + Twine(Scope.SwitchBreakIndex++), pFunction);
+        }
+      }
+
+      // Insert branch to PostScope BB.
+      m_pBuilder->CreateBr(Scope.pPostScopeBB);
+
+      // Set Next BB as active.
+      m_pBuilder->SetInsertPoint(pNextBB);
+      break;
+    }
+
+    case D3D10_SB_OPCODE_BREAKC: {
+      DXASSERT_DXBC(Inst.m_NumOperands == 1);
+
+      // Prepare condition.
+      Value *pCond = LoadZNZCondition(Inst, 0);
+
+      // Find parent scope.
+      Scope &Scope = m_ScopeStack.FindParentLoopOrSwitch();
+
+      // Create a new basic block.
+      BasicBlock *pNextBB;
+      if (Scope.Kind == Scope::Loop) {
+        pNextBB = BasicBlock::Create(m_Ctx, Twine("loop") + Twine(Scope.NameIndex) + 
+                                            Twine(".breakc") + Twine(Scope.LoopBreakIndex++), pFunction);
+      } else {
+        pNextBB = BasicBlock::Create(m_Ctx, Twine("switch") + Twine(Scope.NameIndex) + 
+                                            Twine(".break") + Twine(Scope.SwitchBreakIndex++), pFunction);
+      }
+
+      // Insert cbranch to PostScope and Next BB.
+      m_pBuilder->CreateCondBr(pCond, Scope.pPostScopeBB, pNextBB);
+
+      // Set Next BB as active.
+      m_pBuilder->SetInsertPoint(pNextBB);
+      break;
+    }
+
+    case D3D10_SB_OPCODE_LABEL: {
+      DXASSERT_DXBC(Inst.m_NumOperands == 1);
+      DXASSERT_DXBC(Inst.m_Operands[0].m_Type == D3D10_SB_OPERAND_TYPE_LABEL ||
+                    Inst.m_Operands[0].m_Type == D3D11_SB_OPERAND_TYPE_FUNCTION_BODY);
+      unsigned LabelIdx = Inst.m_Operands[0].m_Index[0].m_RegIndex;
+      const bool IsFb = Inst.m_Operands[0].m_Type == D3D11_SB_OPERAND_TYPE_FUNCTION_BODY;
+      auto &Label = IsFb ? m_InterfaceFunctionBodies[LabelIdx] : m_Labels[LabelIdx];
+      // Create entry basic block.
+      pFunction = Label.pFunc;
+      BasicBlock *pBB = BasicBlock::Create(m_Ctx, "entry", pFunction);
+      m_pBuilder->SetInsertPoint(pBB);
+      IFT(m_ScopeStack.IsEmpty());
+      (void)m_ScopeStack.Push(Scope::Function, nullptr);
+      InsertSM50ResourceHandles();
+      break;
+    }
+
+    case D3D10_SB_OPCODE_CALL: {
+      DXASSERT_DXBC(Inst.m_NumOperands == 1);
+      DXASSERT_DXBC(Inst.m_Operands[0].m_Type == D3D10_SB_OPERAND_TYPE_LABEL);
+      unsigned LabelIdx = Inst.m_Operands[0].m_Index[0].m_RegIndex;
+      auto &Label = m_Labels[LabelIdx];
+      // Create call instruction.
+      m_pBuilder->CreateCall(Label.pFunc);
+      break;
+    }
+
+    case D3D11_SB_OPCODE_INTERFACE_CALL: {
+      DXASSERT_DXBC(Inst.m_Operands[0].m_Type == D3D11_SB_OPERAND_TYPE_INTERFACE);
+      DXASSERT_DXBC(Inst.m_Operands[0].m_IndexDimension == D3D10_SB_OPERAND_INDEX_2D);
+      DXASSERT_DXBC(Inst.m_Operands[0].m_IndexType[0] == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);
+      unsigned BaseIfaceIdx = Inst.m_Operands[0].m_Index[0].m_RegIndex;
+      unsigned CallSiteIdx = Inst.m_InterfaceCall.FunctionIndex;
+      Interface& Iface = m_Interfaces[BaseIfaceIdx];
+      DXASSERT_DXBC(Inst.m_Operands[0].m_IndexType[0] == D3D10_SB_OPERAND_INDEX_IMMEDIATE32 || Iface.bDynamicallyIndexed);
+
+      Value* pIfaceArrayIdx = LoadOperandIndex(Inst.m_Operands[0].m_Index[1], Inst.m_Operands[0].m_IndexType[1]);
+      Value* pIfaceIdx = m_pBuilder->CreateAdd(m_pOP->GetU32Const(BaseIfaceIdx), pIfaceArrayIdx);
+
+      // Load function table index
+      Value *pCBufferRetValue;
+      {
+        Value *Args[3];
+        Args[0] = m_pOP->GetU32Const((unsigned)OP::OpCode::CBufferLoadLegacy);  // OpCode
+        Args[1] = CreateHandle(m_pInterfaceDataBuffer->GetClass(),
+                               m_pInterfaceDataBuffer->GetID(),
+                               m_pOP->GetU32Const(m_pInterfaceDataBuffer->GetLowerBound()),
+                               false /*Nonuniform*/);                           // CBuffer handle
+        Args[2] = pIfaceIdx;                                                    // 0-based index into cbuffer instance
+        Function *pCBufferLoadFunc = m_pOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, Type::getInt32Ty(m_Ctx));
+
+        pCBufferRetValue = m_pBuilder->CreateCall(pCBufferLoadFunc, Args);
+        pCBufferRetValue = m_pBuilder->CreateExtractValue(pCBufferRetValue, 0);
+      }
+
+      // Switch on function table index
+      // Create endswitch BB.
+      BasicBlock* pPostSwitchBB = BasicBlock::Create(m_Ctx, Twine("fcall") + Twine(m_FcallCount) + Twine(".end"));
+      SwitchInst* pSwitch = m_pBuilder->CreateSwitch(pCBufferRetValue, pPostSwitchBB);
+      for (unsigned caseIdx = 0; caseIdx < Iface.Tables.size(); ++caseIdx) {
+        BasicBlock* pCaseBB = BasicBlock::Create(m_Ctx, Twine("fcall") + Twine(m_FcallCount) +
+                                                        Twine(".case") + Twine(caseIdx), pFunction);
+        m_pBuilder->SetInsertPoint(pCaseBB);
+
+        unsigned fbIdx = m_FunctionTables[Iface.Tables[caseIdx]][CallSiteIdx];
+        m_pBuilder->CreateCall(m_InterfaceFunctionBodies[fbIdx].pFunc);
+        m_pBuilder->CreateBr(pPostSwitchBB);
+
+        pSwitch->addCase(m_pBuilder->getInt32(Iface.Tables[caseIdx]), pCaseBB);
+      }
+
+      pFunction->getBasicBlockList().push_back(pPostSwitchBB);
+      m_pBuilder->SetInsertPoint(pPostSwitchBB);
+      ++m_FcallCount;
+      break;
+    }
+
+    case D3D10_SB_OPCODE_CALLC: {
+      DXASSERT_DXBC(Inst.m_NumOperands == 2);
+      DXASSERT_DXBC(Inst.m_Operands[1].m_Type == D3D10_SB_OPERAND_TYPE_LABEL);
+      unsigned LabelIdx = Inst.m_Operands[1].m_Index[0].m_RegIndex;
+      auto &Label = m_Labels[LabelIdx];
+
+      // Prepare condition.
+      Value *pCond = LoadZNZCondition(Inst, 0);
+
+      // Create call and after-call BBs.
+      Function *pCurFunc = m_pBuilder->GetInsertBlock()->getParent();
+      BasicBlock *pCallBB = BasicBlock::Create(m_Ctx, Twine("label") + Twine(LabelIdx) + Twine(".callc"), pCurFunc);
+      BasicBlock *pPostCallBB = BasicBlock::Create(m_Ctx, Twine("label") + Twine(LabelIdx) + Twine(".callc"), pCurFunc);
+
+      // Create cbranch for callc.
+      m_pBuilder->CreateCondBr(pCond, pCallBB, pPostCallBB);
+      m_pBuilder->SetInsertPoint(pCallBB);
+
+      // Create call.
+      m_pBuilder->CreateCall(Label.pFunc);
+      m_pBuilder->CreateBr(pPostCallBB);
+      m_pBuilder->SetInsertPoint(pPostCallBB);
+      break;
+    }
+
+    case D3D10_SB_OPCODE_RET: {
+      // Find parent scope.
+      Scope &FuncScope = m_ScopeStack.FindParentFunction();
+
+      if ((FuncScope.IsEntry() && !m_bPatchConstantPhase) || !FuncScope.IsEntry()) {
+        m_pBuilder->CreateRetVoid();
+        BasicBlock *pAfterRet = BasicBlock::Create(m_Ctx, Twine("afterret"), pFunction);
+        m_pBuilder->SetInsertPoint(pAfterRet);
+      } else {
+        // Hull shader control point phase fork/join.
+        Scope &HullScope = m_ScopeStack.FindParentHullLoop();
+        BasicBlock *pAfterRet = BasicBlock::Create(m_Ctx, Twine("afterret"), pFunction);
+
+        if (m_ScopeStack.Top().Kind == Scope::HullLoop) {
+          bMustCloseHullLoop = true;
+          m_pBuilder->CreateBr(pAfterRet);
+        } else {
+          // A non-terminating return.
+          m_pBuilder->CreateBr(HullScope.pPostScopeBB);
+        }
+
+        m_pBuilder->SetInsertPoint(pAfterRet);
+      }
+      break;
+    }
+
+    case D3D10_SB_OPCODE_RETC: {
+      DXASSERT_DXBC(Inst.m_NumOperands == 1);
+      // Find parent scope.
+      Scope &FuncScope = m_ScopeStack.FindParentFunction();
+
+      // Prepare condition.
+      Value *pCond = LoadZNZCondition(Inst, 0);
+
+      if ((FuncScope.IsEntry() && !m_bPatchConstantPhase) || !FuncScope.IsEntry()) {
+        // Create retc and after-retc BB.
+        BasicBlock *pRetc = BasicBlock::Create(m_Ctx, Twine("label") + Twine(FuncScope.LabelIdx) + 
+                                                      Twine(".callc") + Twine(FuncScope.CallIdx) +
+                                                      Twine(".retc") + Twine(FuncScope.ReturnIndex), pFunction);
+        BasicBlock *pAfterRetc = BasicBlock::Create(m_Ctx, Twine("label") + Twine(FuncScope.LabelIdx) + 
+                                                      Twine(".callc") + Twine(FuncScope.CallIdx) +
+                                                      Twine(".afterretc") + Twine(FuncScope.ReturnIndex++), pFunction);
+
+        // Create cbranch for retc.
+        m_pBuilder->CreateCondBr(pCond, pRetc, pAfterRetc);
+
+        // Emit return.
+        m_pBuilder->SetInsertPoint(pRetc);
+        m_pBuilder->CreateRetVoid();
+        m_pBuilder->SetInsertPoint(pAfterRetc);
+      } else {
+        // Hull shader control point phase fork/join.
+        Scope &HullScope = m_ScopeStack.FindParentHullLoop();
+
+        // Create HullLoopBreak and AfterHullLoopBreak BB.
+        BasicBlock *pAfterHullBreakc = BasicBlock::Create(m_Ctx, Twine("hullloop") + Twine(FuncScope.NameIndex) +
+                                              Twine(".retc") + Twine(FuncScope.HullLoopBreakIndex) +
+                                              Twine(".afterretc"), pFunction);
+
+        // Create cbranch for retc (HullLoopBreak).
+        m_pBuilder->CreateCondBr(pCond, HullScope.pPostScopeBB, pAfterHullBreakc);
+        m_pBuilder->SetInsertPoint(pAfterHullBreakc);
+      }
+
+      break;
+    }
+
+    case D3D11_SB_OPCODE_HS_CONTROL_POINT_PHASE:
+      IFTBOOL(m_ScopeStack.FindParentFunction().IsEntry(), E_FAIL);
+      m_bControlPointPhase = true;
+      break;
+
+    case D3D11_SB_OPCODE_HS_FORK_PHASE:
+    case D3D11_SB_OPCODE_HS_JOIN_PHASE: {
+      if (!m_bPatchConstantPhase) {
+        if (!m_bControlPointPhase) {
+          // This is a pass-through CP HS.
+          bPasshThroughCP = true;
+        }
+        m_bControlPointPhase = false;
+        m_bPatchConstantPhase = true;
+
+        // Start patch constant function.
+        (void)m_ScopeStack.Push(Scope::Function, nullptr);
+        m_ScopeStack.Top().SetEntry(true);
+        pFunction = Function::Create(pEntryFuncType, GlobalValue::LinkageTypes::ExternalLinkage, 
+                                     "pc_main", m_pModule.get());
+        pFunction->setCallingConv(CallingConv::C);
+        m_pPR->SetPatchConstantFunction(pFunction);
+        BasicBlock *pBB = BasicBlock::Create(m_Ctx, "entry", pFunction);
+        m_pBuilder->SetInsertPoint(pBB);
+
+        // Swap active x-registers.
+        m_IndexableRegs.swap(m_PatchConstantIndexableRegs);
+
+        DeclareIndexableRegisters();
+
+        // Create HullLoop induction variable.
+        pHullLoopInductionVar = m_pBuilder->CreateAlloca(Type::getInt32Ty(m_Ctx), nullptr, "InstanceID");
+
+        InsertSM50ResourceHandles();
+      }
+
+      // Create HullLoop-scope.
+      Scope &Scope = m_ScopeStack.Push(Scope::HullLoop, m_pBuilder->GetInsertBlock());
+
+      // Initialize HullLoop induction variable.
+      Scope.pInductionVar = pHullLoopInductionVar;
+      m_pBuilder->CreateStore(m_pOP->GetI32Const(0), Scope.pInductionVar);
+
+      Scope.HullLoopTripCount = m_PatchConstantPhaseInstanceCounts[ForkJoinPhaseIndex];
+      ForkJoinPhaseIndex++;
+
+      // Create HullLoop and EndHullLoop BBs.
+      Scope.pHullLoopBB = BasicBlock::Create(m_Ctx, Twine("hullloop") + Twine(Scope.NameIndex), pFunction);
+      Scope.pPostScopeBB = BasicBlock::Create(m_Ctx, Twine("hullloop") + Twine(Scope.NameIndex) + Twine(".end"));
+
+      // Insert branch to Loop BB.
+      m_pBuilder->CreateBr(Scope.pLoopBB);
+
+      // Set Loop BB as active.
+      m_pBuilder->SetInsertPoint(Scope.pLoopBB);
+      break;
+    }
+
+    case D3D11_SB_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT:
+    case D3D11_SB_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT:
+      break;
+
+    //
+    // Pixel shader.
+    //
+    case D3D10_1_SB_OPCODE_LOD: {
+      OP::OpCode OpCode = OP::OpCode::CalculateLOD;
+      const unsigned uOpOutput = 0;
+      const unsigned uOpCoord = uOpOutput + 1;
+      const unsigned uOpSRV = uOpCoord + 1;
+      const unsigned uOpSampler = uOpSRV + 1;
+      DXASSERT_DXBC(Inst.m_Operands[uOpSRV].m_Type == D3D10_SB_OPERAND_TYPE_RESOURCE);
+
+      OperandValue InCoord, InSRV, InSampler;
+      // Resource.
+      const DxilResource &R = LoadSRVOperand(InSRV, Inst, uOpSRV, CMask::MakeXMask(), CompType::getInvalid());
+      // Coordinates.
+      CMask CoordMask = CMask::MakeFirstNCompMask(DXBC::GetNumResOffsets(R.GetKind()));
+      LoadOperand(InCoord, Inst, uOpCoord, CoordMask, CompType::getF32());
+      // Sampler.
+      LoadOperand(InSampler, Inst, uOpSampler, CMask::MakeXMask(), CompType::getInvalid());
+
+      // Create CalculateLOD call.
+      Value *Args[7];
+      Args[0] = m_pOP->GetU32Const((unsigned)OpCode);   // OpCode
+      Args[1] = InSRV[0];                               // Resource handle
+      Args[2] = InSampler[0];                           // Sampler handle
+      Args[3] = CoordMask.IsSet(0) ? InCoord[0] : m_pUnusedF32;
+      Args[4] = CoordMask.IsSet(1) ? InCoord[1] : m_pUnusedF32;
+      Args[5] = CoordMask.IsSet(2) ? InCoord[2] : m_pUnusedF32;
+
+      CompType DstType = DXBC::GetCompTypeWithMinPrec(CompType::getF32(), Inst.m_Operands[uOpOutput].m_MinPrecision);
+      Type *pDstType = DstType.GetLLVMType(m_Ctx);
+      Function *F = m_pOP->GetOpFunc(OpCode, pDstType);
+
+      // Create unclamped CalculateLOD.
+      Args[6] = m_pOP->GetI1Const(false);                // Unclamped
+      Value *pOpRetUnclamped = m_pBuilder->CreateCall(F, Args);
+      // Create clamped CalculateLOD.
+      Args[6] = m_pOP->GetI1Const(true);                 // Clamped
+      Value *pOpRetClamped = m_pBuilder->CreateCall(F, Args);
+
+      CMask OutputMask = CMask::FromDXBC(Inst.m_Operands[uOpOutput].m_WriteMask);
+      OperandValue Out;
+      for (BYTE c = 0; c < DXBC::kWidth; c++) {
+        if (!OutputMask.IsSet(c)) continue;
+
+        // Respect swizzle: resource swizzle == return value swizzle.
+        BYTE Comp = Inst.m_Operands[uOpSRV].m_Swizzle[c];
+
+        switch (Comp) {
+        case 0: Out[c] = pOpRetClamped; break;
+        case 1: Out[c] = pOpRetUnclamped; break;
+        case 2: __fallthrough;
+        case 3: Out[c] = m_pOP->GetFloatConst(0.f); break;
+        default: DXASSERT_DXBC(false);
+        }
+      }
+      StoreOperand(Out, Inst, uOpOutput, OutputMask, DstType);
+
+      break;
+    }
+
+    case D3D10_SB_OPCODE_DISCARD: {
+      OP::OpCode OpCode = OP::OpCode::Discard;
+
+      Value *Args[2];
+      Args[0] = m_pOP->GetU32Const((unsigned)OpCode);   // OpCode
+      Args[1] = LoadZNZCondition(Inst, 0);              // Condition
+
+      Function *F = m_pOP->GetOpFunc(OpCode, Type::getVoidTy(m_Ctx));
+      MarkPrecise(m_pBuilder->CreateCall(F, Args));
+      break;
+    }
+
+    case D3D10_SB_OPCODE_DERIV_RTX:         __fallthrough;
+    case D3D11_SB_OPCODE_DERIV_RTX_COARSE:  ConvertUnary(OP::OpCode::DerivCoarseX, CompType::getF32(), Inst); break;
+    case D3D10_SB_OPCODE_DERIV_RTY:         __fallthrough;
+    case D3D11_SB_OPCODE_DERIV_RTY_COARSE:  ConvertUnary(OP::OpCode::DerivCoarseY, CompType::getF32(), Inst); break;
+    case D3D11_SB_OPCODE_DERIV_RTX_FINE:    ConvertUnary(OP::OpCode::DerivFineX, CompType::getF32(), Inst); break;
+    case D3D11_SB_OPCODE_DERIV_RTY_FINE:    ConvertUnary(OP::OpCode::DerivFineY, CompType::getF32(), Inst); break;
+
+    case D3D11_SB_OPCODE_EVAL_SNAPPED: {
+      OP::OpCode OpCode = OP::OpCode::EvalSnapped;
+      const unsigned uOpOutput = 0;
+      const unsigned uOpInput = uOpOutput + 1;
+      const unsigned uOpOffset = uOpInput + 1;
+
+      OperandValue InOffset;
+      CMask OutputMask = CMask::FromDXBC(Inst.m_Operands[uOpOutput].m_WriteMask);
+      LoadOperand(InOffset, Inst, uOpOffset, CMask::MakeFirstNCompMask(2), CompType::getI32());
+      const D3D10ShaderBinary::COperandBase &OpInput = Inst.m_Operands[uOpInput];
+      DXASSERT_NOMSG(Inst.m_Operands[uOpInput].m_IndexDimension == D3D10_SB_OPERAND_INDEX_1D);
+      unsigned Register = OpInput.m_Index[0].m_RegIndex;
+      Value *pRowIndexValue = LoadOperandIndex(OpInput.m_Index[0], OpInput.m_IndexType[0]);
+
+      CompType DstType = DXBC::GetCompTypeWithMinPrec(CompType::getF32(), Inst.m_Operands[uOpOutput].m_MinPrecision);
+      Type *pDstType = DstType.GetLLVMType(m_Ctx);
+      Function *F = m_pOP->GetOpFunc(OpCode, pDstType);
+
+      Value *Args[6];
+      Args[0] = m_pOP->GetU32Const((unsigned)OpCode);         // OpCode
+      Args[4] = InOffset[0];                                  // Offset X
+      Args[5] = InOffset[1];                                  // Offset Y
+
+      OperandValue Out;
+      for (BYTE c = 0; c < DXBC::kWidth; c++) {
+        if (!OutputMask.IsSet(c)) continue;
+
+        BYTE Comp = OpInput.m_Swizzle[c];
+        // Retrieve signature element.
+        const DxilSignatureElement *E = m_pInputSignature->GetElement(Register, Comp);
+
+        // Make row/col index relative within element.
+        Value *pRowIndexValueRel = m_pBuilder->CreateSub(pRowIndexValue, m_pOP->GetU32Const(E->GetStartRow()));
+
+        Args[1] = m_pOP->GetU32Const(E->GetID());             // Input signature element ID
+        Args[2] = pRowIndexValueRel;                          // Row, relative to the element
+        Args[3] = m_pOP->GetU8Const(Comp - E->GetStartCol()); // Col, relative to the element
+
+        Out[c] = m_pBuilder->CreateCall(F, Args);
+      }
+      StoreOperand(Out, Inst, uOpOutput, OutputMask, DstType);
+
+      break;
+    }
+
+    case D3D11_SB_OPCODE_EVAL_SAMPLE_INDEX: {
+      OP::OpCode OpCode = OP::OpCode::EvalSampleIndex;
+      const unsigned uOpOutput = 0;
+      const unsigned uOpInput = uOpOutput + 1;
+      const unsigned uOpSampleIndex = uOpInput + 1;
+
+      CMask OutputMask = CMask::FromDXBC(Inst.m_Operands[uOpOutput].m_WriteMask);
+      OperandValue InSampleIndex;
+      LoadOperand(InSampleIndex, Inst, uOpSampleIndex, CMask::MakeXMask(), CompType::getI32());
+      const D3D10ShaderBinary::COperandBase &OpInput = Inst.m_Operands[uOpInput];
+      DXASSERT_NOMSG(Inst.m_Operands[uOpInput].m_IndexDimension == D3D10_SB_OPERAND_INDEX_1D);
+      unsigned Register = OpInput.m_Index[0].m_RegIndex;
+      Value *pRowIndexValue = LoadOperandIndex(OpInput.m_Index[0], OpInput.m_IndexType[0]);
+
+      CompType DstType = DXBC::GetCompTypeWithMinPrec(CompType::getF32(), Inst.m_Operands[uOpOutput].m_MinPrecision);
+      Type *pDstType = DstType.GetLLVMType(m_Ctx);
+      Function *F = m_pOP->GetOpFunc(OpCode, pDstType);
+
+      Value *Args[5];
+      Args[0] = m_pOP->GetU32Const((unsigned)OpCode);         // OpCode
+      Args[4] = InSampleIndex[0];                             // Sample index
+
+      OperandValue Out;
+      for (BYTE c = 0; c < DXBC::kWidth; c++) {
+        if (!OutputMask.IsSet(c)) continue;
+
+        BYTE Comp = OpInput.m_Swizzle[c];
+        // Retrieve signature element.
+        const DxilSignatureElement *E = m_pInputSignature->GetElement(Register, Comp);
+
+        // Make row/col index relative within element.
+        Value *pRowIndexValueRel = m_pBuilder->CreateSub(pRowIndexValue, m_pOP->GetU32Const(E->GetStartRow()));
+
+        Args[1] = m_pOP->GetU32Const(E->GetID());             // Input signature element ID
+        Args[2] = pRowIndexValueRel;                          // Row, relative to the element
+        Args[3] = m_pOP->GetU8Const(Comp - E->GetStartCol()); // Col, relative to the element
+
+        Out[c] = m_pBuilder->CreateCall(F, Args);
+      }
+      StoreOperand(Out, Inst, uOpOutput, OutputMask, DstType);
+
+      break;
+    }
+
+    case D3D11_SB_OPCODE_EVAL_CENTROID: {
+      OP::OpCode OpCode = OP::OpCode::EvalCentroid;
+      const unsigned uOpOutput = 0;
+      const unsigned uOpInput = uOpOutput + 1;
+
+      CMask OutputMask = CMask::FromDXBC(Inst.m_Operands[uOpOutput].m_WriteMask);
+      const D3D10ShaderBinary::COperandBase &OpInput = Inst.m_Operands[uOpInput];
+      DXASSERT_NOMSG(Inst.m_Operands[uOpInput].m_IndexDimension == D3D10_SB_OPERAND_INDEX_1D);
+      unsigned Register = OpInput.m_Index[0].m_RegIndex;
+      Value *pRowIndexValue = LoadOperandIndex(OpInput.m_Index[0], OpInput.m_IndexType[0]);
+
+      CompType DstType = DXBC::GetCompTypeWithMinPrec(CompType::getF32(), Inst.m_Operands[uOpOutput].m_MinPrecision);
+      Type *pDstType = DstType.GetLLVMType(m_Ctx);
+      Function *F = m_pOP->GetOpFunc(OpCode, pDstType);
+
+      Value *Args[4];
+      Args[0] = m_pOP->GetU32Const((unsigned)OpCode);   // OpCode
+
+      OperandValue Out;
+      for (BYTE c = 0; c < DXBC::kWidth; c++) {
+        if (!OutputMask.IsSet(c)) continue;
+
+        BYTE Comp = OpInput.m_Swizzle[c];
+        // Retrieve signature element.
+        const DxilSignatureElement *E = m_pInputSignature->GetElement(Register, Comp);
+
+        // Make row/col index relative within element.
+        Value *pRowIndexValueRel = m_pBuilder->CreateSub(pRowIndexValue, m_pOP->GetU32Const(E->GetStartRow()));
+
+        Args[1] = m_pOP->GetU32Const(E->GetID());             // Input signature element ID
+        Args[2] = pRowIndexValueRel;                          // Row, relative to the element
+        Args[3] = m_pOP->GetU8Const(Comp - E->GetStartCol()); // Col, relative to the element
+
+        Out[c] = m_pBuilder->CreateCall(F, Args);
+      }
+      StoreOperand(Out, Inst, uOpOutput, OutputMask, DstType);
+
+      break;
+    }
+
+    case D3D10_SB_OPCODE_EMIT:
+    case D3D11_SB_OPCODE_EMIT_STREAM: {
+      OP::OpCode OpCode = OP::OpCode::EmitStream;
+      BYTE StreamId = 0;
+
+      if (Inst.OpCode() == D3D11_SB_OPCODE_EMIT_STREAM) {
+        StreamId = (BYTE)Inst.m_Operands[0].m_Index[0].m_RegIndex;
+      }
+
+      // For GS with multiple streams, capture the values of output registers at the emit point.
+      if (m_pPR->HasMultipleOutputStreams()) {
+        EmitGSOutputRegisterStore(StreamId);
+      }
+
+      // Create EmitStream call.
+      Value *Args[2];
+      Args[0] = m_pOP->GetU32Const((unsigned)OpCode);   // OpCode
+      Args[1] = m_pOP->GetU8Const(StreamId);  // Stream ID
+
+      Function *F = m_pOP->GetOpFunc(OpCode, Type::getVoidTy(m_Ctx));
+      MarkPrecise(m_pBuilder->CreateCall(F, Args));
+
+      break;
+    }
+
+    case D3D10_SB_OPCODE_CUT:
+    case D3D11_SB_OPCODE_CUT_STREAM: {
+      OP::OpCode OpCode = OP::OpCode::CutStream;
+      BYTE StreamId = 0;
+
+      if (Inst.OpCode() == D3D11_SB_OPCODE_CUT_STREAM) {
+        StreamId = (BYTE)Inst.m_Operands[0].m_Index[0].m_RegIndex;
+      }
+
+      // Create CutStream call.
+      Value *Args[2];
+      Args[0] = m_pOP->GetU32Const((unsigned)OpCode);   // OpCode
+      Args[1] = m_pOP->GetU8Const(StreamId);  // Stream ID
+
+      Function *F = m_pOP->GetOpFunc(OpCode, Type::getVoidTy(m_Ctx));
+      MarkPrecise(m_pBuilder->CreateCall(F, Args));
+
+      break;
+    }
+
+    case D3D10_SB_OPCODE_EMITTHENCUT:
+    case D3D11_SB_OPCODE_EMITTHENCUT_STREAM: {
+      OP::OpCode OpCode = OP::OpCode::EmitThenCutStream;
+      BYTE StreamId = 0;
+
+      if (Inst.OpCode() == D3D11_SB_OPCODE_EMITTHENCUT_STREAM) {
+        StreamId = (BYTE)Inst.m_Operands[0].m_Index[0].m_RegIndex;
+      }
+
+      // For GS with multiple streams, capture the values of output registers at the emit point.
+      if (m_pPR->HasMultipleOutputStreams()) {
+        EmitGSOutputRegisterStore(StreamId);
+      }
+
+      // Create EmitThenCutStream call.
+      Value *Args[2];
+      Args[0] = m_pOP->GetU32Const((unsigned)OpCode);   // OpCode
+      Args[1] = m_pOP->GetU8Const(StreamId);  // Stream ID
+
+      Function *F = m_pOP->GetOpFunc(OpCode, Type::getVoidTy(m_Ctx));
+      MarkPrecise(m_pBuilder->CreateCall(F, Args));
+
+      break;
+    }
+
+    case D3D10_SB_OPCODE_NOP:
+      break;
+
+    default:
+      HandleUnknownInstruction(Inst);
+      break;
+    }
+  }
+
+  DXASSERT_NOMSG(m_ScopeStack.IsEmpty());
+
+  if (bPasshThroughCP) {
+    Function *Entry = m_pPR->GetEntryFunction();
+    m_pPR->SetEntryFunction(nullptr);
+    Entry->eraseFromParent();
+    m_pPR->SetEntryFunctionName("");
+  }
+
+  CleanupIndexableRegisterDecls(m_IndexableRegs);
+  CleanupIndexableRegisterDecls(m_PatchConstantIndexableRegs);
+
+  RemoveUnreachableBasicBlocks();
+  CleanupGEP();
+}
+
+void DxbcConverter::LogConvertResult(bool InDriver, _In_ const LARGE_INTEGER *pQPCConvertStart,
+  _In_ const LARGE_INTEGER *pQPCConvertEnd, _In_reads_bytes_(DxbcSize) LPCVOID pDxbc, _In_ UINT32 DxbcSize,
+  _In_opt_z_ LPCWSTR pExtraOptions, _In_reads_bytes_(ConvertedSize) LPCVOID pConverted, _In_opt_ UINT32 ConvertedSize,
+  HRESULT hr) {
+  // intentionaly empty - override to report conversion results
+}
+
+void DxbcConverter::HandleUnknownInstruction(D3D10ShaderBinary::CInstruction &Inst) {
+    DXASSERT_ARGS(false, "OpCode %u is not yet implemented", Inst.OpCode());
+}
+
+unsigned DxbcConverter::GetResourceSlot(D3D10ShaderBinary::CInstruction &Inst) {
+    return DXBC::GetResourceSlot(Inst.OpCode());
+}
+
+void DxbcConverter::AdvanceDxbcInstructionStream(D3D10ShaderBinary::CShaderCodeParser &Parser,
+                                                 D3D10ShaderBinary::CInstruction &Inst, 
+                                                 bool &bDoneParsing) {
+  if (bDoneParsing)
+    return;
+
+  if (!Parser.EndOfShader()) {
+    DXASSERT_NOMSG(!bDoneParsing);
+    Parser.ParseInstruction(&Inst);
+  } else {
+    IFTBOOL(!bDoneParsing, E_FAIL);
+    bDoneParsing = true;
+  }
+}
+
+bool DxbcConverter::GetNextDxbcInstruction(D3D10ShaderBinary::CShaderCodeParser &Parser,
+                                           D3D10ShaderBinary::CInstruction &NextInst) {
+  if (Parser.EndOfShader()) {
+    return false;
+  }
+
+  UINT CurPos = Parser.CurrentTokenOffset();
+  Parser.ParseInstruction(&NextInst);
+  Parser.SetCurrentTokenOffset(CurPos);
+  return true;
+}
+
+void DxbcConverter::InsertSM50ResourceHandles() {
+  // Create resource handles for SM5.0- to reduce the number of call instructions (to reduce IR size).
+  // Later: it may be worthwhile to implement a pass to hoist handle creation for SM5.1 here when the index into range is constant and used more than once within the shader.
+  if (!IsSM51Plus()) {
+    for (size_t i = 0; i < m_pPR->GetSRVs().size(); ++i) {
+      DxilResource &R = m_pPR->GetSRV(i);
+      if (R.GetSpaceID() == 0) {
+        R.SetHandle(CreateHandle(R.GetClass(), R.GetID(), m_pOP->GetU32Const(R.GetLowerBound()), false));
+      }
+    }
+    for (size_t i = 0; i < m_pPR->GetUAVs().size(); ++i) {
+      DxilResource &R = m_pPR->GetUAV(i);
+      DXASSERT(R.GetSpaceID() == 0, "In SM5.0, all UAVs should be in space 0");
+      R.SetHandle(CreateHandle(R.GetClass(), R.GetID(), m_pOP->GetU32Const(R.GetLowerBound()), false));
+    }
+    for (size_t i = 0; i < m_pPR->GetCBuffers().size(); ++i) {
+      DxilCBuffer &R = m_pPR->GetCBuffer(i);
+      if (R.GetSpaceID() == 0) {
+        R.SetHandle(CreateHandle(R.GetClass(), R.GetID(), m_pOP->GetU32Const(R.GetLowerBound()), false));
+      }
+    }
+    for (size_t i = 0; i < m_pPR->GetSamplers().size(); ++i) {
+      DxilSampler &R = m_pPR->GetSampler(i);
+      if (R.GetSpaceID() == 0) {
+        R.SetHandle(CreateHandle(R.GetClass(), R.GetID(), m_pOP->GetU32Const(R.GetLowerBound()), false));
+      }
+    }
+  }
+}
+
+void DxbcConverter::InsertInterfacesResourceDecls() {
+  // Insert decls for:
+  // 1. CB14 containing interface table selections, along with "this pointer" information,
+  // 2. 14 CBs in space 1,
+  // 3. 32 samplers in space 1 and 32 comparison samplers in space 2
+  // SRVs will be inserted dynamically as needed
+  if (m_pInterfaceDataBuffer) {
+    return;
+  }
+
+  m_pPR->m_ShaderFlags.SetAllResourcesBound(false);
+
+  // Create interface data buffer
+  {
+    unsigned ID = m_pPR->AddCBuffer(unique_ptr<DxilCBuffer>(new DxilCBuffer));
+    DxilCBuffer &R = m_pPR->GetCBuffer(ID);  // R == record
+    m_pInterfaceDataBuffer = &R;
+    R.SetID(ID);
+    // Root signature bindings.
+    unsigned CBufferSize = D3D11_SHADER_MAX_INTERFACES * 8 /*UINTs per interface*/ * sizeof(UINT);
+    R.SetLowerBound(D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT); // 14
+    R.SetRangeSize(1);
+    R.SetSpaceID(0);
+    // Declare global variable.
+    R.SetGlobalName(SynthesizeResGVName("CB", R.GetID()));
+    StructType *pResType = GetStructResElemType(CBufferSize);
+    R.SetGlobalSymbol(DeclareUndefPtr(pResType, DXIL::kCBufferAddrSpace));
+    R.SetHandle(nullptr);
+
+    // CBuffer-specific state.
+    R.SetSize(CBufferSize);
+  }
+
+  // Create CB array for class instances
+  {
+    unsigned ID = m_pPR->AddCBuffer(unique_ptr<DxilCBuffer>(new DxilCBuffer));
+    DxilCBuffer &R = m_pPR->GetCBuffer(ID);  // R == record
+    m_pClassInstanceCBuffers = &R;
+    R.SetID(ID);
+    // Root signature bindings.
+    unsigned CBufferSize = DXIL::kMaxCBufferSize * DXBC::kWidth * 4;
+    R.SetLowerBound(0);
+    R.SetRangeSize(D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT); // 14
+    R.SetSpaceID(1);
+    // Declare global variable.
+    R.SetGlobalName(SynthesizeResGVName("CB", R.GetID()));
+    StructType *pResType = GetStructResElemType(CBufferSize);
+    R.SetGlobalSymbol(DeclareUndefPtr(pResType, DXIL::kCBufferAddrSpace));
+    R.SetHandle(nullptr);
+
+    // CBuffer-specific state.
+    R.SetSize(CBufferSize);
+  }
+
+  // Create sampler arrays for class instances
+  for (unsigned i = 0; i < 2; ++i) {
+    unsigned ID = m_pPR->AddSampler(unique_ptr<DxilSampler>(new DxilSampler));
+    DxilSampler &R = m_pPR->GetSampler(ID);  // R == record
+    R.SetID(ID);
+    // Root signature bindings.
+    R.SetLowerBound(0);
+    R.SetRangeSize(D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT);
+    R.SetSpaceID(i + 1);
+    // Declare global variable.
+    R.SetGlobalName(SynthesizeResGVName("S", R.GetID()));
+    string ResTypeName("dx.types.Sampler");
+    StructType *pResType = m_pModule->getTypeByName(ResTypeName);
+    if (pResType == nullptr) {
+      pResType = StructType::create(m_Ctx, ResTypeName);
+    }
+    R.SetGlobalSymbol(DeclareUndefPtr(pResType, DXIL::kDeviceMemoryAddrSpace));
+    R.SetHandle(nullptr);
+
+    // Sampler-specific state.
+    R.SetSamplerKind(i == 0 ? DXIL::SamplerKind::Default : DXIL::SamplerKind::Comparison);
+    DxilSampler*& pSampler = (i == 0 ? m_pClassInstanceSamplers : m_pClassInstanceComparisonSamplers);
+    pSampler = &R;
+  }
+}
+
+const DxilResource& DxbcConverter::GetInterfacesSRVDecl(D3D10ShaderBinary::CInstruction &Inst) {
+  InterfaceShaderResourceKey Key = {};
+  DXASSERT_DXBC(Inst.m_ExtendedOpCodeCount == 2); // Extended resource dimension and return type
+  Key.Kind = DXBC::GetResourceKind(Inst.m_ResourceDimEx);
+  if (Inst.m_ResourceDimEx == D3D11_SB_RESOURCE_DIMENSION_STRUCTURED_BUFFER) {
+    Key.StructureByteStride = Inst.m_ResourceDimStructureStrideEx;
+  }
+  else if (Inst.m_ResourceDimEx != D3D11_SB_RESOURCE_DIMENSION_RAW_BUFFER) {
+    Key.TypedSRVRet = DXBC::GetDeclResCompType(Inst.m_ResourceReturnTypeEx[0]).GetKind();
+  }
+  auto iter = m_ClassInstanceSRVs.find(Key);
+  if (iter != m_ClassInstanceSRVs.end()) {
+    return m_pPR->GetSRV(iter->second);
+  }
+   
+  unsigned ID = m_pPR->AddSRV(unique_ptr<DxilResource>(new DxilResource));
+  DxilResource &R = m_pPR->GetSRV(ID);  // R == record
+  R.SetID(ID);
+  R.SetRW(false);
+  // Root signature bindings.
+  R.SetLowerBound(0);
+  R.SetRangeSize(D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT);
+  R.SetHandle(nullptr);
+  R.SetSpaceID(m_ClassInstanceSRVs.size() + 1);
+
+  unsigned SampleCount =
+      (Key.Kind == DXIL::ResourceKind::Texture2DMS ||
+       Key.Kind == DXIL::ResourceKind::Texture2DMSArray) ? 4 : 0;
+  DXASSERT_DXBC(SampleCount == 0); // Don't expect to actually see this used within interfaces...
+
+  // Resource-specific state.
+  StructType *pResType = nullptr;
+  switch (Inst.m_ResourceDimEx) {
+  default: {
+    R.SetKind(DXBC::GetResourceKind(Inst.m_ResourceDimEx));
+    const unsigned kTypedBufferElementSizeInBytes = 4;
+    R.SetElementStride(kTypedBufferElementSizeInBytes);
+    R.SetSampleCount(SampleCount);
+    CompType DeclCT = DXBC::GetDeclResCompType(Inst.m_ResourceReturnTypeEx[0]);
+    if (DeclCT.IsInvalid()) DeclCT = CompType::getU32();
+    R.SetCompType(DeclCT);
+    pResType = GetTypedResElemType(DeclCT);
+    break;
+  }
+  case D3D11_SB_RESOURCE_DIMENSION_RAW_BUFFER: {
+    R.SetKind(DxilResource::Kind::RawBuffer);
+    const unsigned kRawBufferElementSizeInBytes = 1;
+    R.SetElementStride(kRawBufferElementSizeInBytes);
+    pResType = GetTypedResElemType(CompType::getU32());
+    break;
+  }
+  case D3D11_SB_RESOURCE_DIMENSION_STRUCTURED_BUFFER: {
+    R.SetKind(DxilResource::Kind::StructuredBuffer);
+    unsigned Stride = Inst.m_ResourceDimStructureStrideEx;
+    R.SetElementStride(Stride);
+    pResType = GetStructResElemType(Stride);
+    break;
+  }
+  }
+
+  // Declare global variable.
+  R.SetGlobalName(SynthesizeResGVName("T", R.GetID()));
+  R.SetGlobalSymbol(DeclareUndefPtr(pResType, DXIL::kDeviceMemoryAddrSpace));
+  m_ClassInstanceSRVs[Key] = ID;
+  return R;
+}
+
+void DxbcConverter::DeclareIndexableRegisters() {
+  // Reserve storage for x-registers.
+  if (!HasLabels()) {
+    // Only main subroutine: use alloca, as optimization.
+    for (auto &IR : m_IndexableRegs) {
+      DXASSERT_NOMSG(IR.second.pValue32 == nullptr && IR.second.pValue16 == nullptr);
+      Type *pType32 = ArrayType::get(Type::getFloatTy(m_Ctx), IR.second.NumRegs * IR.second.NumComps);
+      AllocaInst *pAlloca32 = m_pBuilder->CreateAlloca(pType32, nullptr, Twine("dx.v32.x") + Twine(IR.first));
+      pAlloca32->setAlignment(kRegCompAlignment);
+      IR.second.pValue32 = pAlloca32;
+      Type *pType16 = ArrayType::get(Type::getHalfTy(m_Ctx), IR.second.NumRegs * IR.second.NumComps);
+      AllocaInst *pAlloca16 = m_pBuilder->CreateAlloca(pType16, nullptr, Twine("dx.v16.x") + Twine(IR.first));
+      pAlloca16->setAlignment(kRegCompAlignment);
+      IR.second.pValue16 = pAlloca16;
+      IR.second.bIsAlloca = true;
+    }
+  } else {
+    // Several subroutines: use global storage.
+    for (auto &IR : m_IndexableRegs) {
+      Type *pType32 = ArrayType::get(Type::getFloatTy(m_Ctx), IR.second.NumRegs * IR.second.NumComps);
+      GlobalVariable *pGV32 = new GlobalVariable(*m_pModule, pType32, 
+                                                 false, GlobalValue::InternalLinkage, 
+                                                 UndefValue::get(pType32), 
+                                                 Twine("dx.v32.x") + Twine(IR.first), nullptr, 
+                                                 GlobalVariable::NotThreadLocal, DXIL::kDefaultAddrSpace);
+      IR.second.pValue32 = pGV32;
+      Type *pType16 = ArrayType::get(Type::getHalfTy(m_Ctx), IR.second.NumRegs * IR.second.NumComps);
+      GlobalVariable *pGV16 = new GlobalVariable(*m_pModule, pType16, 
+                                                 false, GlobalValue::InternalLinkage, 
+                                                 UndefValue::get(pType16), 
+                                                 Twine("dx.v16.x") + Twine(IR.first), nullptr, 
+                                                 GlobalVariable::NotThreadLocal, DXIL::kDefaultAddrSpace);
+      IR.second.pValue16 = pGV16;
+      IR.second.bIsAlloca = false;
+    }
+  }
+}
+
+void DxbcConverter::CleanupIndexableRegisterDecls(map<unsigned, IndexableReg> &IdxRegMap) {
+  for (auto &IR : IdxRegMap) {
+    if (IR.second.pValue32 && !IR.second.pValue32->hasNUsesOrMore(1)) {
+      if (IR.second.bIsAlloca)
+        cast<Instruction>(IR.second.pValue32)->eraseFromParent();
+      else
+        cast<GlobalVariable>(IR.second.pValue32)->eraseFromParent();
+    }
+    if (IR.second.pValue16 && !IR.second.pValue16->hasNUsesOrMore(1)) {
+      if (IR.second.bIsAlloca)
+        cast<Instruction>(IR.second.pValue16)->eraseFromParent();
+      else
+        cast<GlobalVariable>(IR.second.pValue16)->eraseFromParent();
+    }
+  }
+}
+
+void DxbcConverter::RemoveUnreachableBasicBlocks() {
+  for (auto itFn = m_pModule->begin(), endFn = m_pModule->end(); itFn != endFn; ++itFn) {
+    Function *F = itFn;
+
+    vector<BasicBlock *> NoPredSet;
+    // 1. Detect basic blocks without predecessors.
+    for (auto itBB = ++(F->begin()), endBB = F->end(); itBB != endBB; ++itBB) {
+      BasicBlock *B = itBB;
+      if (pred_begin(B) == pred_end(B)) {
+        NoPredSet.emplace_back(B);
+      }
+    }
+
+    // 2. Remove BBs with no predecessors.
+    while (!NoPredSet.empty()) {
+      BasicBlock *B = NoPredSet.back();
+      NoPredSet.pop_back();
+
+      TerminatorInst *pTI = B->getTerminator();
+      vector<BasicBlock*> Successors(pTI->getNumSuccessors());
+      for (unsigned i = 0; i < pTI->getNumSuccessors(); i++) {
+        Successors[i] = pTI->getSuccessor(i);
+      }
+
+      B->eraseFromParent();
+
+      for (auto S : Successors) {
+        if (pred_begin(S) == pred_end(S)) {
+          NoPredSet.emplace_back(S);
+        }
+      }
+    }
+  }
+}
+
+class GEPVisitor : public InstVisitor<GEPVisitor> {
+public:
+  void visitInstruction(Instruction &I) {
+    for (Instruction::op_iterator itOp = I.op_begin(), endOp = I.op_end(); itOp != endOp; ++itOp) {
+      Value *V1 = itOp->get()->stripPointerCasts();
+
+      if (GEPOperator *pGEP = dyn_cast<GEPOperator>(V1)) {
+        bool bReplace = false;
+        SmallVector<Value *, 4> GEPIndices;
+
+        for (GEPOperator::op_iterator itOp = pGEP->idx_begin(), endOp = pGEP->idx_end(); itOp != endOp; ++itOp) {
+          Value *V = itOp->get();
+          GEPIndices.push_back(V);
+
+          if (ConstantInt *C = dyn_cast<ConstantInt>(V)) {
+            LLVMContext &Ctx = C->getContext();
+
+            if (C->getType() != Type::getInt32Ty(Ctx)) {
+              uint64_t n = C->getZExtValue();
+
+              if (n <= (uint64_t)(UINT32_MAX)) {
+                GEPIndices.back() = Constant::getIntegerValue(IntegerType::get(Ctx, 32), APInt(32, (unsigned)n));
+                bReplace = true;
+              }
+            }
+          }
+        }
+
+        if (bReplace) {
+          Constant *pGEP2 = ConstantExpr::getGetElementPtr(pGEP->getPointerOperandType()->getPointerElementType(),
+                                                           dyn_cast<Constant>(pGEP->getPointerOperand()),
+                                                           GEPIndices);
+          pGEP->replaceAllUsesWith(pGEP2);
+        }
+      }
+    }
+  }
+};
+
+// GEPOperators may get i64 constant index values.
+// We replace them here with i32 values, if possible, to avoid 64-bit values in DXIL.
+void DxbcConverter::CleanupGEP() {
+  GEPVisitor a;
+  a.visit(*m_pModule);
+}
+
+void DxbcConverter::ConvertUnary(OP::OpCode OpCode,
+                                 const CompType &ElementType,
+                                 D3D10ShaderBinary::CInstruction &Inst,
+                                 const unsigned DstIdx,
+                                 const unsigned SrcIdx) {
+  DXASSERT_NOMSG(OP::GetOpCodeClass(OpCode) == OP::OpCodeClass::Unary ||
+                 OP::GetOpCodeClass(OpCode) == OP::OpCodeClass::UnaryBits);
+  CMask WriteMask = CMask::FromDXBC(Inst.m_Operands[DstIdx].m_WriteMask);
+  CompType OperationType = DXBC::GetCompTypeWithMinPrec(ElementType, Inst.m_Operands[DstIdx].m_MinPrecision);
+  Type *pOperationType = OperationType.GetLLVMType(m_Ctx);
+  Function *pFunc = m_pOP->GetOpFunc(OpCode, pOperationType);
+
+  OperandValue In, Out;
+  LoadOperand(In, Inst, SrcIdx, WriteMask, OperationType);
+
+  for (BYTE c = 0; c < DXBC::kWidth; c++) {
+    if (!WriteMask.IsSet(c)) continue;
+
+    Out[c] = m_pBuilder->CreateCall(pFunc, { m_pOP->GetU32Const((unsigned)OpCode), In[c] });
+  }
+
+  StoreOperand(Out, Inst, DstIdx, WriteMask, OperationType);
+}
+
+void DxbcConverter::ConvertBinary(OP::OpCode OpCode,
+                                  const CompType &ElementType,
+                                  D3D10ShaderBinary::CInstruction &Inst,
+                                  const unsigned DstIdx,
+                                  const unsigned SrcIdx1,
+                                  const unsigned SrcIdx2) {
+  DXASSERT_NOMSG(OP::GetOpCodeClass(OpCode) == OP::OpCodeClass::Binary);
+  CMask WriteMask = CMask::FromDXBC(Inst.m_Operands[DstIdx].m_WriteMask);
+  CompType OperationType = DXBC::GetCompTypeWithMinPrec(ElementType, Inst.m_Operands[DstIdx].m_MinPrecision);
+  Type *pOperationType = OperationType.GetLLVMType(m_Ctx);
+  Function *pFunc = m_pOP->GetOpFunc(OpCode, pOperationType);
+
+  OperandValue In1, In2, Out;
+  LoadOperand(In1, Inst, SrcIdx1, WriteMask, OperationType);
+  LoadOperand(In2, Inst, SrcIdx2, WriteMask, OperationType);
+
+  for (BYTE c = 0; c < DXBC::kWidth; c++) {
+    if (!WriteMask.IsSet(c)) continue;
+
+    Out[c] = m_pBuilder->CreateCall(pFunc, { m_pOP->GetU32Const((unsigned)OpCode), In1[c], In2[c] });
+
+    if (ElementType.GetKind() == CompType::Kind::F64) {
+      c++;
+    }
+  }
+
+  StoreOperand(Out, Inst, DstIdx, WriteMask, OperationType);
+}
+
+void DxbcConverter::ConvertBinary(Instruction::BinaryOps OpCode,
+                                  const CompType &ElementType,
+                                  D3D10ShaderBinary::CInstruction &Inst, 
+                                  const unsigned DstIdx,
+                                  const unsigned SrcIdx1,
+                                  const unsigned SrcIdx2) {
+  CMask WriteMask = CMask::FromDXBC(Inst.m_Operands[DstIdx].m_WriteMask);
+  CompType OperationType = DXBC::GetCompTypeWithMinPrec(ElementType, Inst.m_Operands[DstIdx].m_MinPrecision);
+
+  OperandValue In1, In2, Out;
+  LoadOperand(In1, Inst, SrcIdx1, WriteMask, OperationType);
+  LoadOperand(In2, Inst, SrcIdx2, WriteMask, OperationType);
+
+  for (BYTE c = 0; c < DXBC::kWidth; c++) {
+    if (!WriteMask.IsSet(c)) continue;
+
+    Value *pVal2 = In2[c];
+    // Limit shift amount to 5 bits.
+    switch (OpCode) {
+    case Instruction::Shl:
+    case Instruction::AShr:
+    case Instruction::LShr:
+      pVal2 = m_pBuilder->CreateAnd(pVal2, 0x0000001F);
+    }
+
+    Out[c] = m_pBuilder->CreateBinOp(OpCode, In1[c], pVal2);
+
+    if (ElementType.GetKind() == CompType::Kind::F64) {
+      c++;
+    }
+  }
+
+  StoreOperand(Out, Inst, DstIdx, WriteMask, OperationType);
+}
+
+void DxbcConverter::ConvertBinaryWithTwoOuts(OP::OpCode OpCode,
+                                             D3D10ShaderBinary::CInstruction &Inst, 
+                                             const unsigned DstIdx1, const unsigned DstIdx2,
+                                             const unsigned SrcIdx1, const unsigned SrcIdx2) {
+  DXASSERT_NOMSG(OP::GetOpCodeClass(OpCode) == OP::OpCodeClass::BinaryWithTwoOuts);
+  CMask WriteMask = CMask::FromDXBC(Inst.m_Operands[DstIdx1].m_WriteMask | Inst.m_Operands[DstIdx2].m_WriteMask);
+
+  if (WriteMask.ToByte() == 0) {
+      // No-op if both destinations are null
+      DXASSERT_NOMSG(Inst.m_Operands[DstIdx1].m_Type == D3D10_SB_OPERAND_TYPE_NULL &&
+          Inst.m_Operands[DstIdx2].m_Type == D3D10_SB_OPERAND_TYPE_NULL);
+      return;
+  }
+
+  CMask Dst1Mask = CMask::FromDXBC(Inst.m_Operands[DstIdx1].m_WriteMask);
+  CMask Dst2Mask = CMask::FromDXBC(Inst.m_Operands[DstIdx2].m_WriteMask);
+  CompType OperationType = CompType::getI32();
+  Type *pOperationType = OperationType.GetLLVMType(m_Ctx);
+  Function *pFunc = m_pOP->GetOpFunc(OpCode, pOperationType);
+
+  OperandValue In1, In2, Out1, Out2;
+  LoadOperand(In1, Inst, SrcIdx1, WriteMask, OperationType);
+  LoadOperand(In2, Inst, SrcIdx2, WriteMask, OperationType);
+
+  for (BYTE c = 0; c < DXBC::kWidth; c++) {
+    if (!WriteMask.IsSet(c)) continue;
+
+    Value *pRes = m_pBuilder->CreateCall(pFunc, { m_pOP->GetU32Const((unsigned)OpCode), In1[c], In2[c] });
+    pRes = MarkPrecise(pRes, c);
+    Out1[c] = m_pBuilder->CreateExtractValue(pRes, 0);
+    Out2[c] = m_pBuilder->CreateExtractValue(pRes, 1);
+  }
+
+  StoreOperand(Out1, Inst, DstIdx1, Dst1Mask, OperationType);
+  StoreOperand(Out2, Inst, DstIdx2, Dst2Mask, OperationType);
+}
+
+void DxbcConverter::ConvertBinaryWithCarry(OP::OpCode OpCode,
+                                           D3D10ShaderBinary::CInstruction &Inst, 
+                                           const unsigned DstIdx1, const unsigned DstIdx2,
+                                           const unsigned SrcIdx1, const unsigned SrcIdx2) {
+  DXASSERT_NOMSG(OP::GetOpCodeClass(OpCode) == OP::OpCodeClass::BinaryWithCarryOrBorrow);
+  CMask WriteMask = CMask::FromDXBC(Inst.m_Operands[DstIdx1].m_WriteMask | Inst.m_Operands[DstIdx2].m_WriteMask);
+  CompType OperationType = CompType::getI32();
+  Type *pOperationType = OperationType.GetLLVMType(m_Ctx);
+  Function *pFunc = m_pOP->GetOpFunc(OpCode, pOperationType);
+
+  OperandValue In1, In2, Out1, Out2;
+  LoadOperand(In1, Inst, SrcIdx1, WriteMask, OperationType);
+  LoadOperand(In2, Inst, SrcIdx2, WriteMask, OperationType);
+
+  for (BYTE c = 0; c < DXBC::kWidth; c++) {
+    if (!WriteMask.IsSet(c)) continue;
+
+    Value *pRes = m_pBuilder->CreateCall(pFunc, { m_pOP->GetU32Const((unsigned)OpCode), In1[c], In2[c] });
+    pRes = MarkPrecise(pRes, c);
+    Out1[c] = m_pBuilder->CreateExtractValue(pRes, 0);
+    Out2[c] = m_pBuilder->CreateExtractValue(pRes, 1);
+    Out2[c] = m_pBuilder->CreateZExt(Out2[c], Type::getInt32Ty(m_Ctx));
+  }
+
+  StoreOperand(Out1, Inst, DstIdx1, WriteMask, OperationType);
+  StoreOperand(Out2, Inst, DstIdx2, WriteMask, CompType::getI32());
+}
+
+void DxbcConverter::ConvertTertiary(OP::OpCode OpCode,
+                                    const CompType &ElementType,
+                                    D3D10ShaderBinary::CInstruction &Inst, 
+                                    const unsigned DstIdx,
+                                    const unsigned SrcIdx1,
+                                    const unsigned SrcIdx2,
+                                    const unsigned SrcIdx3) {
+  DXASSERT_NOMSG(OP::GetOpCodeClass(OpCode) == OP::OpCodeClass::Tertiary);
+  CMask WriteMask = CMask::FromDXBC(Inst.m_Operands[DstIdx].m_WriteMask);
+  CompType OperationType = DXBC::GetCompTypeWithMinPrec(ElementType, Inst.m_Operands[DstIdx].m_MinPrecision);
+  Type *pOperationType = OperationType.GetLLVMType(m_Ctx);
+  if (!m_pOP->IsOverloadLegal(OpCode, pOperationType)) {
+    if (pOperationType == Type::getInt16Ty(m_Ctx)) {
+      pOperationType = Type::getInt32Ty(m_Ctx);
+      OperationType = ElementType;
+    }
+  }
+  Function *pFunc = m_pOP->GetOpFunc(OpCode, pOperationType);
+
+  OperandValue In1, In2, In3, Out;
+  LoadOperand(In1, Inst, SrcIdx1, WriteMask, OperationType);
+  LoadOperand(In2, Inst, SrcIdx2, WriteMask, OperationType);
+  LoadOperand(In3, Inst, SrcIdx3, WriteMask, OperationType);
+
+  for (BYTE c = 0; c < DXBC::kWidth; c++) {
+    if (!WriteMask.IsSet(c)) continue;
+
+    Out[c] = m_pBuilder->CreateCall(pFunc, { m_pOP->GetU32Const((unsigned)OpCode), In1[c], In2[c], In3[c] });
+
+    if (ElementType.GetKind() == CompType::Kind::F64) {
+      c++;
+    }
+  }
+
+  StoreOperand(Out, Inst, DstIdx, WriteMask, OperationType);
+}
+
+void DxbcConverter::ConvertQuaternary(OP::OpCode OpCode,
+                                      const CompType &ElementType,
+                                      D3D10ShaderBinary::CInstruction &Inst, 
+                                      const unsigned DstIdx,
+                                      const unsigned SrcIdx1, const unsigned SrcIdx2,
+                                      const unsigned SrcIdx3, const unsigned SrcIdx4) {
+  DXASSERT_NOMSG(OP::GetOpCodeClass(OpCode) == OP::OpCodeClass::Quaternary);
+  CMask WriteMask = CMask::FromDXBC(Inst.m_Operands[DstIdx].m_WriteMask);
+  CompType OperationType = DXBC::GetCompTypeWithMinPrec(ElementType, Inst.m_Operands[DstIdx].m_MinPrecision);
+  Type *pOperationType = OperationType.GetLLVMType(m_Ctx);
+  Function *pFunc = m_pOP->GetOpFunc(OpCode, pOperationType);
+
+  OperandValue In1, In2, In3, In4, Out;
+  LoadOperand(In1, Inst, SrcIdx1, WriteMask, OperationType);
+  LoadOperand(In2, Inst, SrcIdx2, WriteMask, OperationType);
+  LoadOperand(In3, Inst, SrcIdx3, WriteMask, OperationType);
+  LoadOperand(In4, Inst, SrcIdx4, WriteMask, OperationType);
+
+  for (BYTE c = 0; c < DXBC::kWidth; c++) {
+    if (!WriteMask.IsSet(c)) continue;
+
+    Out[c] = m_pBuilder->CreateCall(pFunc, { m_pOP->GetU32Const((unsigned)OpCode), In1[c], In2[c], In3[c], In4[c] });
+  }
+
+  StoreOperand(Out, Inst, DstIdx, WriteMask, OperationType);
+}
+
+void DxbcConverter::ConvertComparison(CmpInst::Predicate Predicate,
+                                      const CompType &ElementType,
+                                      D3D10ShaderBinary::CInstruction &Inst, 
+                                      const unsigned DstIdx,
+                                      const unsigned SrcIdx1,
+                                      const unsigned SrcIdx2) {
+  CMask WriteMask = CMask::FromDXBC(Inst.m_Operands[DstIdx].m_WriteMask);
+  CompType OperationType = DXBC::GetCompTypeWithMinPrec(ElementType, 
+                                                        GetHigherPrecision(Inst.m_Operands[SrcIdx1].m_MinPrecision, 
+                                                                           Inst.m_Operands[SrcIdx2].m_MinPrecision));
+
+  if (ElementType.GetKind() != CompType::Kind::F64) {
+    OperandValue In1, In2, Out;
+    LoadOperand(In1, Inst, SrcIdx1, WriteMask, OperationType);
+    LoadOperand(In2, Inst, SrcIdx2, WriteMask, OperationType);
+
+    for (BYTE c = 0; c < DXBC::kWidth; c++) {
+      if (!WriteMask.IsSet(c)) continue;
+
+      switch (Predicate) {
+      case CmpInst::FCMP_OEQ:
+      case CmpInst::FCMP_UNE:
+      case CmpInst::FCMP_OLT:
+      case CmpInst::FCMP_OGE:
+        Out[c] = m_pBuilder->CreateFCmp(Predicate, In1[c], In2[c]);
+        break;
+
+      case CmpInst::ICMP_EQ:
+      case CmpInst::ICMP_NE:
+      case CmpInst::ICMP_SLT:
+      case CmpInst::ICMP_SGE:
+      case CmpInst::ICMP_ULT:
+      case CmpInst::ICMP_UGE:
+        Out[c] = m_pBuilder->CreateICmp(Predicate, In1[c], In2[c]);
+        break;
+
+      default:
+        DXASSERT_NOMSG(false);
+      }
+    }
+
+    StoreOperand(Out, Inst, DstIdx, WriteMask, CompType::getI1());
+  } else {
+    // Double-precision comparison.
+    CMask Mask = CMask::GetMaskForDoubleOperation(WriteMask);
+
+    OperandValue In1, In2, Out;
+    LoadOperand(In1, Inst, SrcIdx1, Mask, OperationType);
+    LoadOperand(In2, Inst, SrcIdx2, Mask, OperationType);
+
+    BYTE OperationComp = 0;
+    for (BYTE c = 0; c < DXBC::kWidth; c++) {
+      if (!WriteMask.IsSet(c)) continue;
+
+      switch (Predicate) {
+      case CmpInst::FCMP_OEQ:
+      case CmpInst::FCMP_UNE:
+      case CmpInst::FCMP_OLT:
+      case CmpInst::FCMP_OGE:
+        Out[c] = m_pBuilder->CreateFCmp(Predicate, In1[OperationComp], In2[OperationComp]);
+        break;
+
+      default:
+        DXASSERT_NOMSG(false);
+      }
+
+      OperationComp += 2;
+    }
+
+    StoreOperand(Out, Inst, DstIdx, WriteMask, CompType::getI1());
+  }
+}
+
+void DxbcConverter::ConvertDotProduct(OP::OpCode OpCode, 
+                                      const BYTE NumComps,
+                                      const CMask &LoadMask,
+                                      D3D10ShaderBinary::CInstruction &Inst) {
+  CMask WriteMask = CMask::FromDXBC(Inst.m_Operands[0].m_WriteMask);
+  CompType OperationType = DXBC::GetCompTypeWithMinPrec(CompType::getF32(), Inst.m_Operands[0].m_MinPrecision);
+  Type *pOperationType = OperationType.GetLLVMType(m_Ctx);
+  Function *pFunc = m_pOP->GetOpFunc(OpCode, pOperationType);
+
+  OperandValue In1, In2, Out;
+  LoadOperand(In1, Inst, 1, LoadMask, OperationType);
+  LoadOperand(In2, Inst, 2, LoadMask, OperationType);
+
+  vector<Value*> Args;
+  Args.resize(1 + NumComps*2);
+  Args[0] = m_pOP->GetU32Const((unsigned)OpCode);
+  for (BYTE c = 0; c < NumComps; c++) {
+    Args[1 + c] = In1[c];
+    Args[1 + NumComps + c] = In2[c];
+  }
+  Value *pValue = m_pBuilder->CreateCall(pFunc, Args);
+
+  for (BYTE c = 0; c < DXBC::kWidth; c++) {
+    if (!WriteMask.IsSet(c)) continue;
+    Out[c] = pValue;
+  }
+
+  StoreOperand(Out, Inst, 0, WriteMask, OperationType);
+}
+
+static Value *SafeConvertCast(IRBuilder<> &Builder, Value *pSrc, Type *pDstType, CompType::Kind SrcKind, CompType::Kind DstKind) {
+  // Prevent undef or nullptr values from getting through
+  Value *pResult = nullptr;
+
+  switch (SrcKind) {
+  case CompType::Kind::F32:
+    switch (DstKind) {
+    case CompType::Kind::I32: pResult = Builder.CreateFPToSI(pSrc, pDstType); break;
+    case CompType::Kind::U32: pResult = Builder.CreateFPToUI(pSrc, pDstType); break;
+    case CompType::Kind::F16: pResult = Builder.CreateFPTrunc(pSrc, pDstType); break;
+    case CompType::Kind::F64: pResult = Builder.CreateFPExt(pSrc, pDstType); break;
+    }
+    break;
+
+  case CompType::Kind::I32:
+    switch (DstKind) {
+    case CompType::Kind::F32:
+    case CompType::Kind::F64: pResult = Builder.CreateSIToFP(pSrc, pDstType); break;
+    }
+    break;
+
+  case CompType::Kind::U32:
+    switch (DstKind) {
+    case CompType::Kind::F32:
+    case CompType::Kind::F64: pResult = Builder.CreateUIToFP(pSrc, pDstType); break;
+    }
+    break;
+
+  case CompType::Kind::F16:
+    switch (DstKind) {
+    case CompType::Kind::F32:
+    case CompType::Kind::F64: pResult = Builder.CreateFPExt(pSrc, pDstType); break;
+    }
+    break;
+  }
+
+  // Note: Conversion from F64 uses ConvertFromDouble instead.
+
+  DXASSERT(pResult != nullptr, "otherwise the caller passed incorrect type combination");
+
+  // nullptr result indicates an error, but undef result may also occur with out-of-range constants
+  // Rescue null or undef result by converting to max/min(u)int/+-infinity, or 0xfefefefe/+-nan, to prevent invalid IR.
+  if (!pResult || isa<UndefValue>(pResult)) {
+    bool bSrcNegative = false;
+    bool bInvalid = !pResult;
+    // Get src sign:
+    if (ConstantFP *pConstFP = dyn_cast<ConstantFP>(pSrc)) {
+      bSrcNegative = pConstFP->getValueAPF().isNegative();
+    }
+    else if (ConstantInt *pConstInt = dyn_cast<ConstantInt>(pSrc)) {
+      bSrcNegative = pConstInt->getValue().isNegative();
+    } else {
+      DXASSERT(false, "unhandled case for SafeConvertCast failure.");
+      bInvalid = true;
+    }
+
+    if (pDstType->isIntegerTy()) {
+      DXASSERT(pDstType->getScalarSizeInBits() == 32, "otherwise, int dest type is not expected size");
+      APInt API(32, 0xFEFEFEFE);
+      if (!bInvalid) {
+        switch (DstKind) {
+        case CompType::Kind::I32: API = bSrcNegative ? APInt::getSignedMinValue(32) : APInt::getSignedMaxValue(32); break;
+        case CompType::Kind::U32: API = bSrcNegative ? APInt::getNullValue(32) : APInt::getMaxValue(32); break;
+        }
+      }
+      pResult = ConstantInt::get(pDstType->getContext(), API);
+    } else {
+      if (bInvalid) {
+        pResult = ConstantFP::getNaN(pDstType, bSrcNegative);
+      } else {
+        pResult = ConstantFP::getInfinity(pDstType, bSrcNegative);
+      }
+    }
+  }
+
+  return pResult;
+}
+
+void DxbcConverter::ConvertCast(const CompType &SrcElementType,
+                                const CompType &DstElementType,
+                                D3D10ShaderBinary::CInstruction &Inst, 
+                                const unsigned DstIdx,
+                                const unsigned SrcIdx) {
+  CMask WriteMask = CMask::FromDXBC(Inst.m_Operands[DstIdx].m_WriteMask);
+  Type *pDstType = DstElementType.GetLLVMType(m_Ctx);
+
+  OperandValue In, Out;
+  LoadOperand(In, Inst, SrcIdx, WriteMask, SrcElementType);
+
+  for (BYTE c = 0; c < DXBC::kWidth; c++) {
+    if (!WriteMask.IsSet(c)) continue;
+    Out[c] = SafeConvertCast(*m_pBuilder, In[c], pDstType, SrcElementType.GetKind(), DstElementType.GetKind());
+  }
+
+  StoreOperand(Out, Inst, DstIdx, WriteMask, DstElementType);
+}
+
+void DxbcConverter::ConvertToDouble(const CompType &SrcElementType, D3D10ShaderBinary::CInstruction &Inst) {
+  const unsigned DstIdx = 0;
+  const unsigned SrcIdx = 1;
+  CMask WriteMask = CMask::FromDXBC(Inst.m_Operands[DstIdx].m_WriteMask);
+  CompType DstElementType = CompType::getF64();
+  Type *pDstType = DstElementType.GetLLVMType(m_Ctx);
+  CMask Mask;
+  BYTE OutputComp;
+  switch (WriteMask.ToByte()) {
+  case 0x0:   return;
+  case 0x3:   Mask = CMask(1,0,0,0); OutputComp = 0; break;
+  case 0xC:   Mask = CMask(1,0,0,0); OutputComp = 2; break;
+  case 0xF:   Mask = CMask(1,1,0,0); OutputComp = 0; break;
+  default: DXASSERT_DXBC(false);
+  }
+
+  OperandValue In, Out;
+  LoadOperand(In, Inst, SrcIdx, Mask, SrcElementType);
+
+  for (BYTE c = 0; c < DXBC::kWidth; c++) {
+    if (!Mask.IsSet(c)) continue;
+    Out[OutputComp] = SafeConvertCast(*m_pBuilder, In[c], pDstType, SrcElementType.GetKind(), DstElementType.GetKind());
+    OutputComp += 2;
+  }
+
+  StoreOperand(Out, Inst, DstIdx, WriteMask, DstElementType);
+}
+
+void DxbcConverter::ConvertFromDouble(const CompType &DstElementType, D3D10ShaderBinary::CInstruction &Inst) {
+  const unsigned DstIdx = 0;
+  const unsigned SrcIdx = 1;
+  CMask WriteMask = CMask::FromDXBC(Inst.m_Operands[DstIdx].m_WriteMask);
+  CompType SrcElementType = CompType::getF64();
+  CMask Mask = CMask::GetMaskForDoubleOperation(WriteMask);
+
+  OperandValue In, Out;
+  LoadOperand(In, Inst, SrcIdx, Mask, SrcElementType);
+
+  BYTE OperationComp = 0;
+  for (BYTE c = 0; c < DXBC::kWidth; c++) {
+    if (!WriteMask.IsSet(c)) continue;
+
+    OP::OpCode OpCode = OP::OpCode(0);
+    switch (DstElementType.GetKind()) {
+    case CompType::Kind::I32: OpCode = OP::OpCode::LegacyDoubleToSInt32; break;
+    case CompType::Kind::U32: OpCode = OP::OpCode::LegacyDoubleToUInt32; break;
+    case CompType::Kind::F32: OpCode = OP::OpCode::LegacyDoubleToFloat;  break;
+    default: DXASSERT_NOMSG(false);
+    }
+
+    // Create call.
+    Function *F = F = m_pOP->GetOpFunc(OpCode, Type::getVoidTy(m_Ctx));
+    Value *Args[2];
+    Args[0] = m_pOP->GetU32Const((unsigned)OpCode);   // OpCode
+    Args[1] = In[OperationComp];                      // Double value
+
+    Out[c] = MarkPrecise(m_pBuilder->CreateCall(F, Args));
+
+    OperationComp += 2;
+  }
+
+  StoreOperand(Out, Inst, DstIdx, WriteMask, DstElementType);
+}
+
+void DxbcConverter::LoadCommonSampleInputs(D3D10ShaderBinary::CInstruction &Inst, Value *pArgs[], bool bSetOffsets) {
+  bool bHasFeedback = DXBC::HasFeedback(Inst.OpCode());
+  const unsigned uOpOutput = 0;
+  const unsigned uOpStatus = 1;
+  const unsigned uOpCoord = uOpStatus + (bHasFeedback ? 1 : 0);
+  const unsigned uOpSRV = DXBC::GetResourceSlot(Inst.OpCode());
+  const unsigned uOpSampler = uOpSRV + 1;
+  DXASSERT_DXBC(Inst.m_Operands[uOpSRV].m_Type == D3D10_SB_OPERAND_TYPE_RESOURCE);
+  DXASSERT_DXBC(Inst.m_Operands[uOpSampler].m_Type == D3D10_SB_OPERAND_TYPE_SAMPLER);
+
+  OperandValue InSRV, InSampler, InCoord;
+  // Resource.
+  const DxilResource &R = LoadSRVOperand(InSRV, Inst, uOpSRV, CMask::MakeXMask(), CompType::getInvalid());
+  // Coordinates.
+  CMask CoordMask = CMask::MakeFirstNCompMask(DXBC::GetNumResCoords(R.GetKind()));
+  LoadOperand(InCoord, Inst, uOpCoord, CoordMask, CompType::getF32());
+  // Sampler.
+  LoadOperand(InSampler, Inst, uOpSampler, CMask::MakeXMask(), CompType::getInvalid());
+
+  // Create Sample call's common arguments.
+  pArgs[1] = InSRV[0];                                        // SRV handle
+  pArgs[2] = InSampler[0];                                    // Sampler handle
+  pArgs[3] = CoordMask.IsSet(0) ? InCoord[0] : m_pUnusedF32;  // Coordinate 0
+  pArgs[4] = CoordMask.IsSet(1) ? InCoord[1] : m_pUnusedF32;  // Coordinate 1
+  pArgs[5] = CoordMask.IsSet(2) ? InCoord[2] : m_pUnusedF32;  // Coordinate 2
+  pArgs[6] = CoordMask.IsSet(3) ? InCoord[3] : m_pUnusedF32;  // Coordinate 3
+  
+  // Offsets.
+  if (bSetOffsets) {
+    CMask ResOffsetMask = CMask::MakeFirstNCompMask(DXBC::GetNumResOffsets(R.GetKind()));
+    pArgs[7] = ResOffsetMask.IsSet(0) ? m_pOP->GetU32Const(Inst.m_TexelOffset[0]) : m_pUnusedI32; // Offset 0
+    pArgs[8] = ResOffsetMask.IsSet(1) ? m_pOP->GetU32Const(Inst.m_TexelOffset[1]) : m_pUnusedI32; // Offset 1
+    pArgs[9] = ResOffsetMask.IsSet(2) ? m_pOP->GetU32Const(Inst.m_TexelOffset[2]) : m_pUnusedI32; // Offset 2
+  }
+}
+
+void DxbcConverter::StoreResRetOutputAndStatus(D3D10ShaderBinary::CInstruction &Inst,
+                                               Value *pResRet,
+                                               CompType DstType) {
+  bool bHasFeedback = DXBC::HasFeedback(Inst.OpCode());
+  const unsigned uOpOutput = 0;
+  const unsigned uOpStatus = 1;
+  const unsigned uOpRes = GetResourceSlot(Inst);
+
+  MarkPrecise(pResRet);
+
+  // Store output.
+  CMask OutputMask = CMask::FromDXBC(Inst.m_Operands[uOpOutput].m_WriteMask);
+  if (!OutputMask.IsZero()) {
+    OperandValue Out;
+    for (BYTE c = 0; c < DXBC::kWidth; c++) {
+      if (!OutputMask.IsSet(c)) continue;
+
+      // Respect swizzle: resource swizzle == return value swizzle.
+      BYTE Comp = Inst.m_Operands[uOpRes].m_Swizzle[c];
+
+      Out[c] = m_pBuilder->CreateExtractValue(pResRet, Comp);
+    }
+    StoreOperand(Out, Inst, uOpOutput, OutputMask, DstType);
+  }
+
+  // Store status.
+  if (bHasFeedback) {
+    CMask StatusMask = CMask::FromDXBC(Inst.m_Operands[uOpStatus].m_WriteMask);
+    if (!StatusMask.IsZero()) {
+      OperandValue Status;
+      for (BYTE c = 0; c < DXBC::kWidth; c++) {
+        if (!StatusMask.IsSet(c)) continue;
+
+        const unsigned uStatusField = 4;
+        Status[c] = m_pBuilder->CreateExtractValue(pResRet, uStatusField);
+      }
+      StoreOperand(Status, Inst, uOpStatus, StatusMask, CompType::getU32());
+    }
+  }
+}
+
+void DxbcConverter::StoreGetDimensionsOutput(D3D10ShaderBinary::CInstruction &Inst, Value *pGetDimRet) {
+  const unsigned uOpOutput = 0;
+  const unsigned uOpRes = DXBC::GetResourceSlot(Inst.OpCode());
+
+  CMask OutputMask = CMask::FromDXBC(Inst.m_Operands[uOpOutput].m_WriteMask);
+  if (OutputMask.IsZero())
+    return;
+
+  // Resource.
+  const DxilResource *R;
+  if (Inst.m_Operands[uOpRes].m_Type == D3D10_SB_OPERAND_TYPE_RESOURCE) {
+    R = &GetSRVFromOperand(Inst, uOpRes);
+  } else {
+    unsigned RangeID = Inst.m_Operands[uOpRes].m_Index[0].m_RegIndex;
+    R = &m_pPR->GetUAV(m_UAVRangeMap[RangeID]);
+  }
+
+  // Return type.
+  CompType RetType = DXBC::GetCompTypeWithMinPrec(CompType::getI32(), Inst.m_Operands[uOpOutput].m_MinPrecision);
+
+  // Value type.
+  CompType ValueType = CompType::getI32();
+  bool bRcp = false;
+  switch (Inst.m_ResInfoReturnType) {
+  case D3D10_SB_RESINFO_INSTRUCTION_RETURN_FLOAT:
+    ValueType = CompType::getF32();
+    break;
+  case D3D10_SB_RESINFO_INSTRUCTION_RETURN_RCPFLOAT:
+    ValueType = CompType::getF32();
+    bRcp = true;
+    break;
+  case D3D10_SB_RESINFO_INSTRUCTION_RETURN_UINT:
+    ValueType = CompType::getI32();
+    break;
+  default:
+    DXASSERT_DXBC(false);
+  }
+
+  OperandValue Out;
+  for (BYTE c = 0; c < DXBC::kWidth; c++) {
+    if (!OutputMask.IsSet(c)) continue;
+
+    // Respect swizzle: resource swizzle == return value swizzle.
+    BYTE Comp = Inst.m_Operands[uOpRes].m_Swizzle[c];
+
+    Value *pCompVal = m_pBuilder->CreateExtractValue(pGetDimRet, Comp);
+    if (ValueType.IsFloatTy()) {
+      pCompVal = m_pBuilder->CreateCast(Instruction::CastOps::UIToFP, pCompVal, Type::getFloatTy(m_Ctx));
+    }
+    if (bRcp) {
+      if (Comp < DxilResource::GetNumDimensions(R->GetKind())) {
+        pCompVal = m_pBuilder->CreateBinOp(Instruction::BinaryOps::FDiv, m_pOP->GetFloatConst(1.0f), pCompVal);
+      }
+    }
+
+    Out[c] = pCompVal;
+  }
+
+  StoreOperand(Out, Inst, uOpOutput, OutputMask, ValueType);
+}
+
+void DxbcConverter::StoreSamplePosOutput(D3D10ShaderBinary::CInstruction &Inst, Value *pSamplePosVal) {
+  const unsigned uOpOutput = 0;
+  const unsigned uOpRes = DXBC::GetResourceSlot(Inst.OpCode());
+  CompType DstType = DXBC::GetCompTypeWithMinPrec(CompType::getF32(), Inst.m_Operands[uOpOutput].m_MinPrecision);
+
+  // Store output.
+  CMask OutputMask = CMask::FromDXBC(Inst.m_Operands[uOpOutput].m_WriteMask);
+  if (!OutputMask.IsZero()) {
+    OperandValue Out;
+    for (BYTE c = 0; c < DXBC::kWidth; c++) {
+      if (!OutputMask.IsSet(c)) continue;
+
+      BYTE Comp = Inst.m_Operands[uOpRes].m_Swizzle[c];
+      if (Comp < 2) {
+        Out[c] = m_pBuilder->CreateExtractValue(pSamplePosVal, Comp);
+      } else {
+        Out[c] = m_pOP->GetFloatConst(0);
+      }
+    }
+    StoreOperand(Out, Inst, uOpOutput, OutputMask, DstType);
+  }
+}
+
+void DxbcConverter::StoreBroadcastOutput(D3D10ShaderBinary::CInstruction &Inst, Value *pValue, CompType DstType) {
+  const unsigned uOpOutput = 0;
+  CMask OutputMask = CMask::FromDXBC(Inst.m_Operands[uOpOutput].m_WriteMask);
+  if (!OutputMask.IsZero()) {
+    OperandValue Out;
+    for (BYTE c = 0; c < DXBC::kWidth; c++) {
+      if (!OutputMask.IsSet(c)) continue;
+
+      Out[c] = pValue;
+    }
+    StoreOperand(Out, Inst, uOpOutput, OutputMask, DstType);
+  }
+}
+
+Value *DxbcConverter::GetCoordValue(D3D10ShaderBinary::CInstruction &Inst, const unsigned uCoordIdx) {
+  BYTE CoordComp = Inst.m_Operands[uCoordIdx].m_ComponentName;
+  OperandValue InCoord;
+  CMask CoordMask = CMask::MakeCompMask(CoordComp);
+  LoadOperand(InCoord, Inst, uCoordIdx, CoordMask, CompType::getI32());
+  return InCoord[CoordComp];
+}
+
+Value *DxbcConverter::GetByteOffset(D3D10ShaderBinary::CInstruction &Inst, const unsigned Idx1, 
+                                    const unsigned Idx2, const unsigned Stride) {
+  const unsigned uOpElementOffset = Idx1;
+  const unsigned uOpStructByteOffset = Idx2;
+  OperandValue InElementOffset, InStructByteOffset;
+
+  // Element offset.
+  BYTE ElementOffsetComp = Inst.m_Operands[uOpElementOffset].m_ComponentName;
+  CMask CoordMask = CMask::MakeCompMask(ElementOffsetComp);
+  LoadOperand(InElementOffset, Inst, uOpElementOffset, CoordMask, CompType::getI32());
+
+  // Byte offset into the structure.
+  BYTE StructByteOffsetComp = Inst.m_Operands[uOpStructByteOffset].m_ComponentName;
+  CMask StructByteOffsetMask = CMask::MakeCompMask(StructByteOffsetComp);
+  LoadOperand(InStructByteOffset, Inst, uOpStructByteOffset, StructByteOffsetMask, CompType::getI32());
+
+  // Calculate byte offset.
+  Value *pOffset1 = InElementOffset[ElementOffsetComp];
+  Value *pOffset2 = InStructByteOffset[StructByteOffsetComp];
+  Value *pMul = pOffset1;
+  if (Stride > 1) {
+    Value *pStride = m_pOP->GetU32Const(Stride);
+    pMul = m_pBuilder->CreateMul(pOffset1, pStride);
+  }
+  Value *pByteOffset = m_pBuilder->CreateAdd(pMul, pOffset2);
+
+  return pByteOffset;
+}
+
+void DxbcConverter::ConvertLoadTGSM(D3D10ShaderBinary::CInstruction &Inst, const unsigned uOpTGSM,
+                                    const unsigned uOpOutput, CompType SrcType, Value *pByteOffset) {
+  DXASSERT_DXBC(Inst.m_Operands[uOpTGSM].m_Type == D3D11_SB_OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY);
+  const TGSMEntry &R = m_TGSMMap[Inst.m_Operands[uOpTGSM].m_Index[0].m_RegIndex];
+
+  CMask OutputMask = CMask::FromDXBC(Inst.m_Operands[uOpOutput].m_WriteMask);
+  if (OutputMask.IsZero())
+    return;
+
+  OperandValue Out;
+  CompType DstType = DXBC::GetCompTypeFromMinPrec(Inst.m_Operands[uOpOutput].m_MinPrecision, CompType::getF32());
+  Type *pSrcType = SrcType.GetLLVMPtrType(m_Ctx, DXIL::kTGSMAddrSpace);
+
+  for (BYTE c = 0; c < DXBC::kWidth; c++) {
+    if (!OutputMask.IsSet(c)) continue;
+
+    // Swizzle.
+    BYTE Comp = Inst.m_Operands[uOpTGSM].m_Swizzle[c];
+
+    // Adjust index for component.
+    Value *pValueIndex = pByteOffset;
+    if (Comp > 0) {
+      pValueIndex = m_pBuilder->CreateAdd(pByteOffset, m_pOP->GetU32Const(Comp * kRegCompAlignment));
+    }
+
+    // Create GEP.
+    Value *pGEPIndices[2] = { m_pOP->GetU32Const(0), pValueIndex };
+    Value *pPtrI8 = m_pBuilder->CreateGEP(R.pVar, pGEPIndices);
+
+    // Create load.
+    Value *pPtr = m_pBuilder->CreatePointerCast(pPtrI8, pSrcType);
+    LoadInst *pLoad = m_pBuilder->CreateLoad(pPtr);
+    pLoad->setAlignment(kRegCompAlignment);
+
+    Out[c] = CastDxbcValue(pLoad, SrcType, DstType);
+  }
+
+  StoreOperand(Out, Inst, uOpOutput, OutputMask, DstType);
+}
+
+void DxbcConverter::ConvertStoreTGSM(D3D10ShaderBinary::CInstruction &Inst, const unsigned uOpTGSM, 
+                                     const unsigned uOpValue, CompType BaseValueType, Value *pByteOffset) {
+  DXASSERT_DXBC(Inst.m_Operands[uOpTGSM].m_Type == D3D11_SB_OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY);
+  const TGSMEntry &R = m_TGSMMap[Inst.m_Operands[uOpTGSM].m_Index[0].m_RegIndex];
+
+  // Value type.
+  CompType ValueType = DXBC::GetCompTypeFromMinPrec(Inst.m_Operands[uOpValue].m_MinPrecision, BaseValueType);
+
+  // Store TGSM value.
+  CMask OutputMask = CMask::FromDXBC(Inst.m_Operands[uOpTGSM].m_WriteMask);
+  if (OutputMask.IsZero())
+    return;
+
+  // Value.
+  OperandValue InValue;
+  LoadOperand(InValue, Inst, uOpValue, OutputMask, ValueType);
+    
+  CompType DstType = BaseValueType;
+  Type *pDstType = DstType.GetLLVMPtrType(m_Ctx, DXIL::kTGSMAddrSpace);
+  for (BYTE c = 0; c < DXBC::kWidth; c++) {
+    if (!OutputMask.IsSet(c)) continue;
+
+    // Adjust index for component.
+    Value *pValueIndex = pByteOffset;
+    if (c > 0) {
+      pValueIndex = m_pBuilder->CreateAdd(pByteOffset, m_pOP->GetU32Const(c * kRegCompAlignment));
+    }
+
+    // Cast value to the right type.
+    Value *pValue = CastDxbcValue(InValue[c], ValueType, DstType);
+
+    // Create GEP.
+    Value *pGEPIndices[2] = { m_pOP->GetU32Const(0), pValueIndex };
+    Value *pPtrI8 = m_pBuilder->CreateGEP(R.pVar, pGEPIndices);
+
+    // Create store.
+    Value *pPtr = m_pBuilder->CreatePointerCast(pPtrI8, pDstType);
+    StoreInst *pStore = m_pBuilder->CreateStore(pValue, pPtr);
+    pStore->setAlignment(kRegCompAlignment);
+    (void)MarkPrecise(pStore, c);
+  }
+}
+
+void DxbcConverter::EmitGSOutputRegisterStore(unsigned StreamId) {
+  const auto &Sig = m_pOutputSignature->m_Signature.GetElements();
+
+  // For each output decl for stream StreamID.
+  for (size_t i = 0; i < Sig.size(); i++) {
+    DxilSignatureElement &SE = m_pOutputSignature->m_Signature.GetElement(i);
+
+    if (SE.GetOutputStream() != StreamId)
+      continue;
+
+    DXASSERT(SE.GetRows() == 1, "to support indexable output in GS with multiple output streams");
+    unsigned TempReg = GetGSTempRegForOutputReg(SE.GetStartRow());
+
+    CompType DxbcValueType = SE.GetCompType();
+    Type *pDxbcValueType = DxbcValueType.GetLLVMType(m_Ctx);
+
+    for (BYTE c = 0; c < SE.GetCols(); c++) {
+      BYTE Comp = SE.GetStartCol() + c;
+
+      Value *pValue;
+      // 1. Load value from the corresponding temp reg.
+      {
+        Value *Args[2];
+        Args[0] = m_pOP->GetU32Const((unsigned)OP::OpCode::TempRegLoad);  // OpCode
+        Args[1] = m_pOP->GetU32Const(DXBC::GetRegIndex(TempReg, Comp));   // Linearized register index
+        Function *F = m_pOP->GetOpFunc(OP::OpCode::TempRegLoad, pDxbcValueType);
+        pValue = m_pBuilder->CreateCall(F, Args);
+      }
+      // 2. Store the value to the output reg.
+      {
+        Value *Args[5];
+        Args[0] = m_pOP->GetU32Const((unsigned)OP::OpCode::StoreOutput);  // OpCode
+        Args[1] = m_pOP->GetU32Const(SE.GetID());                         // Output signature element ID
+        Args[2] = m_pOP->GetU32Const(0);                                  // Row, relative to the element
+        Args[3] = m_pOP->GetU8Const(c);                                   // Col, relative to the element
+        Args[4] = pValue;                                                 // Value
+        Function *F = m_pOP->GetOpFunc(OP::OpCode::StoreOutput, pDxbcValueType);
+        m_pBuilder->CreateCall(F, Args);
+      }
+    }
+  }
+}
+
+Value *DxbcConverter::CreateHandle(DxilResourceBase::Class Class, unsigned RangeID, 
+                                   Value *pIndex, bool bNonUniformIndex) {
+  DXASSERT(pIndex->getType() == Type::getInt32Ty(m_Ctx), "index should be i32 type");
+  OP::OpCode OpCode = OP::OpCode::CreateHandle;
+  Value *Args[5];
+  Args[0] = m_pOP->GetU32Const((unsigned)OpCode);           // OpCode
+  Args[1] = m_pOP->GetU8Const((BYTE)Class);       // Resource class (SRV, UAV, CBuffer, Sampler)
+  Args[2] = m_pOP->GetU32Const(RangeID);          // Range ID
+  Args[3] = pIndex;                               // 0-based index into the range
+  Args[4] = m_pOP->GetI1Const(bNonUniformIndex);  // Non-uniform resource index
+  Function *pCreateHandleFunc = m_pOP->GetOpFunc(OpCode, Type::getVoidTy(m_Ctx));
+  return m_pBuilder->CreateCall(pCreateHandleFunc, Args);
+}
+
+Value *DxbcConverter::LoadConstFloat(float& fVal) {
+  unsigned uVal = *(unsigned *)&fVal;
+  APFloat V(fVal);
+  float fVal2 = V.convertToFloat();
+
+  if ((*(unsigned *)&fVal2) == uVal) {
+    return m_pOP->GetFloatConst(fVal);
+  } else {
+    OP::OpCode OpCode = OP::OpCode::BitcastI32toF32;
+    Value *Args[2];
+    Args[0] = m_pOP->GetU32Const((unsigned)OpCode); // OpCode
+    Args[1] = m_pOP->GetU32Const(uVal);             // Input
+    Function *F = m_pOP->GetOpFunc(OpCode, Type::getVoidTy(m_Ctx));
+    return m_pBuilder->CreateCall(F, Args);
+  }
+}
+
+void DxbcConverter::LoadOperand(OperandValue &SrcVal,
+                                D3D10ShaderBinary::CInstruction &Inst,
+                                const unsigned OpIdx,
+                                const CMask &Mask,
+                                const CompType &ValueType) {
+  D3D10ShaderBinary::COperandBase &O = Inst.m_Operands[OpIdx];
+
+  switch (O.m_Type) {
+  case D3D10_SB_OPERAND_TYPE_IMMEDIATE32:
+    DXASSERT_DXBC(O.m_Modifier == D3D10_SB_OPERAND_MODIFIER_NONE);
+    for (BYTE c = 0; c < DXBC::kWidth; c++) {
+      if (!Mask.IsSet(c)) continue;
+
+      bool bVec4 = O.m_NumComponents == D3D10_SB_OPERAND_4_COMPONENT;
+      BYTE Comp =  bVec4 ? c : 0;
+
+      switch (ValueType.GetKind()) {
+      case CompType::Kind::F32:
+        SrcVal[c] = LoadConstFloat(O.m_Valuef[Comp]);
+        break;
+
+      case CompType::Kind::F16:
+        SrcVal[c] = CastDxbcValue(LoadConstFloat(O.m_Valuef[Comp]), CompType::Kind::F32, CompType::Kind::F16);
+        break;
+
+      case CompType::Kind::I32: __fallthrough;
+      case CompType::Kind::U32:
+        SrcVal[c] = m_pOP->GetU32Const(O.m_Value[Comp]);
+        break;
+
+      case CompType::Kind::I16: __fallthrough;
+      case CompType::Kind::U16:
+        SrcVal[c] = CastDxbcValue(m_pOP->GetU32Const(O.m_Value[Comp]), CompType::Kind::U32, CompType::Kind::I16);
+        break;
+
+      case CompType::Kind::I1:
+        SrcVal[c] = CastDxbcValue(m_pOP->GetU32Const(O.m_Value[Comp]), CompType::Kind::U32, CompType::Kind::I1);
+        break;
+
+      default:
+        DXASSERT_DXBC(false);
+      }
+    }
+    break;
+
+  case D3D10_SB_OPERAND_TYPE_IMMEDIATE64:
+    DXASSERT_NOMSG(ValueType.GetKind() == CompType::Kind::F64);
+    for (BYTE c = 0; c < DXBC::kWidth; c += 2) {
+      if (!Mask.IsSet(c)) continue;
+
+      SrcVal[c] = m_pOP->GetDoubleConst(O.m_Valued[c]);
+    }
+    break;
+
+  case D3D10_SB_OPERAND_TYPE_TEMP: {
+    DXASSERT_DXBC(O.m_IndexDimension == D3D10_SB_OPERAND_INDEX_1D);
+    unsigned Reg = O.m_Index[0].m_RegIndex;
+    CompType DxbcValueType = DXBC::GetCompTypeFromMinPrec(O.m_MinPrecision, ValueType);
+    if (DxbcValueType.IsBoolTy()) {
+      DxbcValueType = CompType::getI32();
+    }
+    Type *pDxbcValueType = DxbcValueType.GetLLVMType(m_Ctx);
+
+    if (DxbcValueType.GetKind() != CompType::Kind::F64)
+    {
+      for (OperandValueHelper OVH(SrcVal, Mask, O); !OVH.IsDone(); OVH.Advance()) {
+        BYTE Comp = OVH.GetComp();
+
+        Value *Args[2];
+        Args[0] = m_pOP->GetU32Const((unsigned)OP::OpCode::TempRegLoad);    // OpCode
+        Args[1] = m_pOP->GetU32Const(DXBC::GetRegIndex(Reg, Comp));         // Linearized register index
+        Function *F = m_pOP->GetOpFunc(OP::OpCode::TempRegLoad, pDxbcValueType);
+        Value *pValue = m_pBuilder->CreateCall(F, Args);
+
+        pValue = CastDxbcValue(pValue, DxbcValueType, ValueType);
+        pValue = ApplyOperandModifiers(pValue, O);
+
+        OVH.SetValue(pValue);
+      }
+    } else {
+      DXASSERT_DXBC(CMask::IsValidDoubleMask(Mask));
+      for (OperandValueHelper OVH(SrcVal, Mask, O); !OVH.IsDone(); OVH.Advance()) {
+        BYTE Comp = OVH.GetComp();
+
+        Value *pValue1, *pValue2;
+        {
+          Value *Args[2];
+          Args[0] = m_pOP->GetU32Const((unsigned)OP::OpCode::TempRegLoad);  // OpCode
+          Args[1] = m_pOP->GetU32Const(DXBC::GetRegIndex(Reg, Comp));       // Linearized register index1
+          Function *F = m_pOP->GetOpFunc(OP::OpCode::TempRegLoad, CompType::getU32().GetLLVMType(m_Ctx));
+          pValue1 = m_pBuilder->CreateCall(F, Args);
+          Args[1] = m_pOP->GetU32Const(DXBC::GetRegIndex(Reg, Comp+1));     // Linearized register index2
+          pValue2 = m_pBuilder->CreateCall(F, Args);
+        }
+
+        Value *pValue;
+        {
+          Value *Args[3];
+          Function *F = m_pOP->GetOpFunc(OP::OpCode::MakeDouble, pDxbcValueType);
+          Args[0] = m_pOP->GetU32Const((unsigned)OP::OpCode::MakeDouble);   // OpCode
+          Args[1] = pValue1;                              // Lo part
+          Args[2] = pValue2;                              // Hi part
+          pValue = m_pBuilder->CreateCall(F, Args);
+          pValue = ApplyOperandModifiers(pValue, O);
+        }
+
+        OVH.SetValue(pValue);
+        OVH.Advance();
+      }
+    }
+
+    break;
+  }
+
+  case D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP: {
+    DXASSERT_DXBC(O.m_IndexDimension == D3D10_SB_OPERAND_INDEX_2D);
+    DXASSERT_DXBC(O.m_IndexType[0] == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);
+    unsigned Reg = O.m_Index[0].m_RegIndex;
+    IndexableReg &IRRec = m_IndexableRegs[Reg];
+    Value *pXRegIndex = LoadOperandIndex(O.m_Index[1], O.m_IndexType[1]);
+    Value *pRegIndex = m_pBuilder->CreateMul(pXRegIndex, m_pOP->GetI32Const(IRRec.NumComps));
+    CompType DxbcValueType = DXBC::GetCompTypeFromMinPrec(O.m_MinPrecision, ValueType);
+    if (DxbcValueType.IsBoolTy()) {
+      DxbcValueType = CompType::getI32();
+    }
+
+    if (DxbcValueType.GetKind() != CompType::Kind::F64) {
+      for (OperandValueHelper OVH(SrcVal, Mask, O); !OVH.IsDone(); OVH.Advance()) {
+        BYTE Comp = OVH.GetComp();
+        Value *pValue = nullptr;
+
+        // Create GEP.
+        Value *pIndex = m_pBuilder->CreateAdd(pRegIndex, m_pOP->GetU32Const(Comp));
+        Value *pGEPIndices[2] = { m_pOP->GetU32Const(0), pIndex };
+
+        if (!DxbcValueType.HasMinPrec()) {
+          Value *pBasePtr = m_IndexableRegs[Reg].pValue32;
+          Value *pPtr = m_pBuilder->CreateGEP(pBasePtr, pGEPIndices);
+          pValue = m_pBuilder->CreateAlignedLoad(pPtr, kRegCompAlignment);
+          pValue = CastDxbcValue(pValue, CompType::getF32(), ValueType);
+        } else {
+          // Create GEP.
+          Value *pBasePtr = m_IndexableRegs[Reg].pValue16;
+          Value *pPtr = m_pBuilder->CreateGEP(pBasePtr, pGEPIndices);
+          pValue = m_pBuilder->CreateAlignedLoad(pPtr, kRegCompAlignment/2);
+          pValue = CastDxbcValue(pValue, CompType::getF16(), ValueType);
+        }
+
+        pValue = ApplyOperandModifiers(pValue, O);
+
+        OVH.SetValue(pValue);
+      }
+    } else {
+      // Double precision.
+      for (OperandValueHelper OVH(SrcVal, Mask, O); !OVH.IsDone(); OVH.Advance()) {
+        BYTE Comp = OVH.GetComp();
+        Value *pValue = nullptr;
+
+        // Create GEP.
+        Value *pIndex = m_pBuilder->CreateAdd(pRegIndex, m_pOP->GetU32Const(Comp));
+        Value *pGEPIndices[1] = { pIndex };
+        Value *pBasePtr = m_pBuilder->CreateBitCast(m_IndexableRegs[Reg].pValue32, Type::getDoublePtrTy(m_Ctx));
+        Value *pPtr = m_pBuilder->CreateGEP(pBasePtr, pGEPIndices);
+        pValue = m_pBuilder->CreateAlignedLoad(pPtr, kRegCompAlignment*2);
+
+        pValue = ApplyOperandModifiers(pValue, O);
+
+        OVH.SetValue(pValue);
+        OVH.Advance();
+        OVH.SetValue(pValue);
+      }
+    }
+
+    break;
+  }
+
+  case D3D10_SB_OPERAND_TYPE_INPUT:
+  case D3D11_SB_OPERAND_TYPE_INPUT_CONTROL_POINT: {
+    OP::OpCode OpCode = OP::OpCode::LoadInput;
+    unsigned Register;        // Starting index of the register range.
+    Value *pUnitIndexValue;   // Vertex/point index expression.
+    Value *pRowIndexValue;    // Row index expression.
+
+    switch (O.m_IndexDimension) {
+    case D3D10_SB_OPERAND_INDEX_1D:
+      Register        = O.m_Index[0].m_RegIndex;
+      pUnitIndexValue = m_pUnusedI32;
+      pRowIndexValue  = LoadOperandIndex(O.m_Index[0], O.m_IndexType[0]);
+      break;
+
+    case D3D10_SB_OPERAND_INDEX_2D:
+      // 2D input register index: <index1, input register index>.
+      // index1: GS -- vertex index, DS -- input control point index.
+      Register        = O.m_Index[1].m_RegIndex;
+      pUnitIndexValue = LoadOperandIndex(O.m_Index[0], O.m_IndexType[0]);
+      pRowIndexValue  = LoadOperandIndex(O.m_Index[1], O.m_IndexType[1]);
+      break;
+
+    default:
+      DXASSERT(false, "there should no other index dimensions");
+    }
+
+    for (OperandValueHelper OVH(SrcVal, Mask, O); !OVH.IsDone(); OVH.Advance()) {
+      BYTE Comp = OVH.GetComp();
+      // Retrieve signature element.
+      const DxilSignatureElement *E = m_pInputSignature->GetElement(Register, Comp);
+      CompType DxbcValueType = E->GetCompType();
+      if (DxbcValueType.IsBoolTy()) {
+        DxbcValueType = CompType::getI32();
+      }
+      Type *pDxbcValueType = DxbcValueType.GetLLVMType(m_Ctx);
+
+      MutableArrayRef<Value *> Args;
+      Value *Args1[1];
+      Value *Args5[5];
+
+      if (E->GetKind() == DXIL::SemanticKind::SampleIndex) {
+        // Use SampleIndex intrinsic instead of LoadInput
+        Args = Args1;
+        OpCode = OP::OpCode::SampleIndex;
+      } else {
+        Args = Args5;
+        // Make row/col index relative within element.
+        Value *pRowIndexValueRel = m_pBuilder->CreateSub(pRowIndexValue, m_pOP->GetU32Const(E->GetStartRow()));
+        Args[1] = m_pOP->GetU32Const(E->GetID());             // Input signature element ID
+        Args[2] = pRowIndexValueRel;                          // Row, relative to the element
+        Args[3] = m_pOP->GetU8Const(Comp - E->GetStartCol()); // Col, relative to the element
+        Args[4] = pUnitIndexValue;                            // Vertex/point index
+      }
+
+      Args[0] = m_pOP->GetU32Const((unsigned)OpCode);       // OpCode
+
+      Function *F = m_pOP->GetOpFunc(OpCode, pDxbcValueType);
+      Value *pValue = m_pBuilder->CreateCall(F, Args);
+
+      pValue = CastDxbcValue(pValue, DxbcValueType, ValueType);
+      pValue = ApplyOperandModifiers(pValue, O);
+
+      OVH.SetValue(pValue);
+    }
+
+    break;
+  }
+
+  case D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER: {
+    // Upconvert operand to SM5.1.
+    if (O.m_IndexDimension == D3D10_SB_OPERAND_INDEX_2D) {
+      O.m_IndexDimension = D3D10_SB_OPERAND_INDEX_3D;
+      O.m_IndexType[2] = O.m_IndexType[1]; 
+      O.m_Index[2] = O.m_Index[1];
+      O.m_IndexType[1] = O.m_IndexType[0];
+      O.m_Index[1] = O.m_Index[0];
+    }
+
+    // Retrieve cbuffer range ID and record.
+    const DxilCBuffer* pR = m_pClassInstanceCBuffers;
+    if (O.m_IndexType[0] == D3D10_SB_OPERAND_INDEX_IMMEDIATE32) {
+      unsigned RangeID = O.m_Index[0].m_RegIndex;
+      unsigned RecIdx = m_CBufferRangeMap[RangeID];
+      pR = &m_pPR->GetCBuffer(RecIdx);
+    }
+
+    const DxilCBuffer &R = *pR;
+
+    // Setup cbuffer handle.
+    Value *pHandle = R.GetHandle();
+    if (pHandle == nullptr) {
+      // Create dynamic-index handle.
+      pHandle = CreateHandle(R.GetClass(), R.GetID(), LoadOperandIndex(O.m_Index[1], O.m_IndexType[1]), O.m_Nonuniform);
+    }
+
+    // Load values for unique components.
+    Value *pRegIndexValue = LoadOperandIndex(O.m_Index[2], O.m_IndexType[2]);
+    CompType DxbcValueType = ValueType.GetBaseCompType();
+    if (DxbcValueType.IsBoolTy()) {
+      DxbcValueType = CompType::getI32();
+    }
+    Type *pDxbcValueType = DxbcValueType.GetLLVMType(m_Ctx);
+
+    DXASSERT_NOMSG(m_bLegacyCBufferLoad);
+    Value *Args[3];
+    Args[0] = m_pOP->GetU32Const((unsigned)OP::OpCode::CBufferLoadLegacy);  // OpCode
+    Args[1] = pHandle;                                                      // CBuffer handle
+    Args[2] = pRegIndexValue;                                               // 0-based index into cbuffer instance
+    Function *pCBufferLoadFunc = m_pOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, pDxbcValueType);
+
+    Value *pCBufferRetValue = m_pBuilder->CreateCall(pCBufferLoadFunc, Args);
+
+    if (ValueType.GetKind() != CompType::Kind::F64) {
+      for (OperandValueHelper OVH(SrcVal, Mask, O); !OVH.IsDone(); OVH.Advance()) {
+        BYTE Comp = OVH.GetComp();
+
+        Value *pValue = m_pBuilder->CreateExtractValue(pCBufferRetValue, Comp);
+        pValue = CastDxbcValue(pValue, DxbcValueType, ValueType);
+        pValue = ApplyOperandModifiers(pValue, O);
+
+        OVH.SetValue(pValue);
+      }
+    } else {
+      for (OperandValueHelper OVH(SrcVal, Mask, O); !OVH.IsDone(); OVH.Advance()) {
+        BYTE Comp = OVH.GetComp() / 2;
+
+        Value *pValue = m_pBuilder->CreateExtractValue(pCBufferRetValue, Comp);
+        pValue = CastDxbcValue(pValue, DxbcValueType, ValueType);
+        pValue = ApplyOperandModifiers(pValue, O);
+
+        OVH.SetValue(pValue);
+        OVH.Advance();
+        OVH.SetValue(pValue);
+      }
+    }
+
+    break;
+  }
+
+  case D3D10_SB_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER: {
+    DXASSERT_DXBC(O.m_IndexDimension == D3D10_SB_OPERAND_INDEX_1D);
+    Value *pRegIndex = LoadOperandIndex(O.m_Index[0], O.m_IndexType[0]);
+
+    if (ValueType.GetKind() != CompType::Kind::F64) {
+      for (OperandValueHelper OVH(SrcVal, Mask, O); !OVH.IsDone(); OVH.Advance()) {
+        BYTE Comp = OVH.GetComp();
+
+        Value *pValueIndex = m_pBuilder->CreateMul(pRegIndex, m_pOP->GetI32Const(DXBC::kWidth));
+        pValueIndex = m_pBuilder->CreateAdd(pValueIndex, m_pOP->GetI32Const(Comp));
+        // Create GEP.
+        Value *pGEPIndices[2] = { m_pOP->GetU32Const(0), pValueIndex };
+        Value *pPtr = m_pBuilder->CreateGEP(m_pIcbGV, pGEPIndices);
+        LoadInst *pLoad = m_pBuilder->CreateLoad(pPtr);
+        pLoad->setAlignment(kRegCompAlignment);
+        Value *pValue = CastDxbcValue(pLoad, CompType::getF32(), ValueType);
+        pValue = ApplyOperandModifiers(pValue, O);
+      
+        OVH.SetValue(pValue);
+      }
+    } else {
+      // Double precision ICB.
+      for (OperandValueHelper OVH(SrcVal, Mask, O); !OVH.IsDone(); OVH.Advance()) {
+        BYTE Comp = OVH.GetComp();
+
+        Value *pValueIndex = m_pBuilder->CreateMul(pRegIndex, m_pOP->GetI32Const(DXBC::kWidth));
+        pValueIndex = m_pBuilder->CreateAdd(pValueIndex, m_pOP->GetI32Const(Comp));
+        // Bitcast pointer.
+        Value *pPtrBase = m_pBuilder->CreateBitCast(m_pIcbGV, Type::getDoublePtrTy(m_Ctx));
+        // Create GEP.
+        Value *pGEPIndices[1] = { pValueIndex };
+        Value *pPtr = m_pBuilder->CreateGEP(pPtrBase, pGEPIndices);
+        LoadInst *pLoad = m_pBuilder->CreateLoad(pPtr);
+        pLoad->setAlignment(kRegCompAlignment*2);
+        Value *pValue = pLoad;
+        pValue = ApplyOperandModifiers(pValue, O);
+      
+        OVH.SetValue(pValue);
+        OVH.Advance();
+        OVH.SetValue(pValue);
+      }
+    }
+    break;
+  }
+
+  case D3D10_SB_OPERAND_TYPE_SAMPLER: {
+    // Upconvert operand to SM5.1.
+    if (O.m_IndexDimension == D3D10_SB_OPERAND_INDEX_1D) {
+      O.m_IndexDimension = D3D10_SB_OPERAND_INDEX_2D;
+      O.m_IndexType[1] = O.m_IndexType[0];
+      O.m_Index[1] = O.m_Index[0];
+    }
+
+    // Retrieve sampler range ID and record.
+    const DxilSampler* pR = nullptr;
+    if (O.m_IndexType[0] == D3D10_SB_OPERAND_INDEX_IMMEDIATE32) {
+      unsigned RangeID = O.m_Index[0].m_RegIndex;
+      unsigned RecIdx = m_SamplerRangeMap[RangeID];
+      pR = &m_pPR->GetSampler(RecIdx);
+    }
+    else {
+      switch (Inst.OpCode()) {
+      case D3D10_SB_OPCODE_SAMPLE_C:
+      case D3D10_SB_OPCODE_SAMPLE_C_LZ:
+      case D3DWDDM1_3_SB_OPCODE_SAMPLE_C_CLAMP_FEEDBACK:
+      case D3DWDDM1_3_SB_OPCODE_SAMPLE_C_LZ_FEEDBACK:
+      case D3D11_SB_OPCODE_GATHER4_PO_C:
+      case D3DWDDM1_3_SB_OPCODE_GATHER4_PO_C_FEEDBACK:
+        pR = m_pClassInstanceComparisonSamplers;
+        break;
+      default:
+        pR = m_pClassInstanceSamplers;
+        break;
+      }
+    }
+    const DxilSampler &R = *pR;
+
+    // Setup sampler handle.
+    Value *pHandle = R.GetHandle();
+    if (pHandle == nullptr) {
+      // Create dynamic-index handle.
+      pHandle = CreateHandle(R.GetClass(), R.GetID(), LoadOperandIndex(O.m_Index[1], O.m_IndexType[1]), O.m_Nonuniform);
+    }
+
+    // Replicate handle values.
+    for (BYTE c = 0; c < DXBC::kWidth; c++) {
+      if (Mask.IsSet(c))
+        SrcVal[c] = pHandle;
+    }
+
+    break;
+  }
+
+  case D3D10_SB_OPERAND_TYPE_RESOURCE: {
+    (void)LoadSRVOperand(SrcVal, Inst, OpIdx, Mask, ValueType);
+    break;
+  }
+
+  case D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW: {
+    // Upconvert operand to SM5.1.
+    if (O.m_IndexDimension == D3D10_SB_OPERAND_INDEX_1D) {
+      DXASSERT_DXBC(O.m_IndexType[0] == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);
+      O.m_IndexDimension = D3D10_SB_OPERAND_INDEX_2D;
+      O.m_IndexType[1] = O.m_IndexType[0];
+      O.m_Index[1] = O.m_Index[0];
+    }
+
+    // Retrieve UAV range ID and record.
+    DXASSERT_DXBC(O.m_IndexType[0] == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);
+    unsigned RangeID = O.m_Index[0].m_RegIndex;
+    unsigned RecIdx = m_UAVRangeMap[RangeID];
+    const DxilResource &R = m_pPR->GetUAV(RecIdx);
+
+    // Setup UAV handle.
+    Value *pHandle = R.GetHandle();
+    if (pHandle == nullptr) {
+      DXASSERT(IsSM51Plus(), "otherwise did not initialize handles on entry to main");
+      // Create dynamic-index handle.
+      pHandle = CreateHandle(R.GetClass(), R.GetID(), LoadOperandIndex(O.m_Index[1], O.m_IndexType[1]), O.m_Nonuniform);
+    }
+
+    // Replicate handle values.
+    for (BYTE c = 0; c < DXBC::kWidth; c++) {
+      if (Mask.IsSet(c))
+        SrcVal[c] = pHandle;
+    }
+
+    break;
+  }
+
+  case D3D10_SB_OPERAND_TYPE_RASTERIZER: {
+    DXASSERT_DXBC(O.m_IndexDimension == D3D10_SB_OPERAND_INDEX_0D);
+    DXASSERT_DXBC(false);   // "rasterizer" register is not used in DXIL.
+    break;
+  }
+
+  case D3D11_SB_OPERAND_TYPE_INPUT_THREAD_ID:
+  case D3D11_SB_OPERAND_TYPE_INPUT_THREAD_GROUP_ID:
+  case D3D11_SB_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP: {
+    OP::OpCode OpCode;
+    switch (O.m_Type) {
+    case D3D11_SB_OPERAND_TYPE_INPUT_THREAD_ID:           OpCode = OP::OpCode::ThreadId; break;
+    case D3D11_SB_OPERAND_TYPE_INPUT_THREAD_GROUP_ID:     OpCode = OP::OpCode::GroupId; break;
+    case D3D11_SB_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP:  OpCode = OP::OpCode::ThreadIdInGroup; break;
+    }
+    CompType DxbcValueType = CompType::Kind::I32;
+    Type *pDxbcValueType = DxbcValueType.GetLLVMType(m_Ctx);
+    Function *F = m_pOP->GetOpFunc(OpCode, pDxbcValueType);
+
+    for (OperandValueHelper OVH(SrcVal, Mask, O); !OVH.IsDone(); OVH.Advance()) {
+      BYTE Comp = OVH.GetComp();
+
+      Value *Args[2];
+      Args[0] = m_pOP->GetU32Const((unsigned)OpCode);       // OpCode
+      Args[1] = m_pOP->GetU32Const(Comp);         // Component: x,y,z
+      Value *pValue = m_pBuilder->CreateCall(F, Args);
+
+      pValue = CastDxbcValue(pValue, DxbcValueType, ValueType);
+      pValue = ApplyOperandModifiers(pValue, O);
+
+      OVH.SetValue(pValue);
+    }
+    break;
+  }
+
+  case D3D11_SB_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED: {
+    OP::OpCode OpCode = OP::OpCode::FlattenedThreadIdInGroup;
+    CompType DxbcValueType = CompType::Kind::I32;
+    Type *pDxbcValueType = DxbcValueType.GetLLVMType(m_Ctx);
+    Function *F = m_pOP->GetOpFunc(OpCode, pDxbcValueType);
+
+    for (OperandValueHelper OVH(SrcVal, Mask, O); !OVH.IsDone(); OVH.Advance()) {
+      Value *Args[1];
+      Args[0] = m_pOP->GetU32Const((unsigned)OpCode);       // OpCode
+      Value *pValue = m_pBuilder->CreateCall(F, Args);
+
+      pValue = CastDxbcValue(pValue, DxbcValueType, ValueType);
+      pValue = ApplyOperandModifiers(pValue, O);
+
+      OVH.SetValue(pValue);
+    }
+    break;
+  }
+
+  case D3D11_SB_OPERAND_TYPE_INPUT_PATCH_CONSTANT: {
+    DXASSERT_DXBC(O.m_IndexDimension == D3D10_SB_OPERAND_INDEX_1D);
+    unsigned Register     = O.m_Index[0].m_RegIndex;
+    Value *pRowIndexValue = LoadOperandIndex(O.m_Index[0], O.m_IndexType[0]);
+
+    for (OperandValueHelper OVH(SrcVal, Mask, O); !OVH.IsDone(); OVH.Advance()) {
+      BYTE Comp = OVH.GetComp();
+      // Retrieve signature element.
+      const DxilSignatureElement *E = m_pPatchConstantSignature->GetElement(Register, Comp);
+      CompType DxbcValueType = E->GetCompType();
+      if (DxbcValueType.IsBoolTy()) {
+        DxbcValueType = CompType::getI32();
+      }
+      Type *pDxbcValueType = DxbcValueType.GetLLVMType(m_Ctx);
+
+      // Make row/col index relative within element.
+      Value *pRowIndexValueRel = m_pBuilder->CreateSub(pRowIndexValue, m_pOP->GetU32Const(E->GetStartRow()));
+
+      Value *Args[4];
+      Args[0] = m_pOP->GetU32Const((unsigned)OP::OpCode::LoadPatchConstant);  // OpCode
+      Args[1] = m_pOP->GetU32Const(E->GetID());                               // Patch constant signature element ID
+      Args[2] = pRowIndexValueRel;                                            // Row, relative to the element
+      Args[3] = m_pOP->GetU8Const(Comp - E->GetStartCol());                   // Col, relative to the element
+      Function *F = m_pOP->GetOpFunc(OP::OpCode::LoadPatchConstant, pDxbcValueType);
+      Value *pValue = m_pBuilder->CreateCall(F, Args);
+
+      pValue = CastDxbcValue(pValue, DxbcValueType, ValueType);
+      pValue = ApplyOperandModifiers(pValue, O);
+
+      OVH.SetValue(pValue);
+    }
+
+    break;
+  }
+
+  case D3D11_SB_OPERAND_TYPE_OUTPUT_CONTROL_POINT: {
+    DXASSERT_DXBC(O.m_IndexDimension == D3D10_SB_OPERAND_INDEX_2D);
+    OP::OpCode OpCode = OP::OpCode::LoadOutputControlPoint;
+    unsigned Register      = O.m_Index[1].m_RegIndex;                           // Starting index of the register range.
+    Value *pUnitIndexValue = LoadOperandIndex(O.m_Index[0], O.m_IndexType[0]);  // Vertex/point index expression.
+    Value *pRowIndexValue  = LoadOperandIndex(O.m_Index[1], O.m_IndexType[1]);  // Row index expression.
+
+    for (OperandValueHelper OVH(SrcVal, Mask, O); !OVH.IsDone(); OVH.Advance()) {
+      BYTE Comp = OVH.GetComp();
+      // Retrieve signature element.
+      const DxilSignatureElement *E = m_pOutputSignature->GetElement(Register, Comp);
+      CompType DxbcValueType = E->GetCompType();
+      if (DxbcValueType.IsBoolTy()) {
+        DxbcValueType = CompType::getI32();
+      }
+      Type *pDxbcValueType = DxbcValueType.GetLLVMType(m_Ctx);
+
+      // Make row/col index relative within element.
+      Value *pRowIndexValueRel = m_pBuilder->CreateSub(pRowIndexValue, m_pOP->GetU32Const(E->GetStartRow()));
+
+      Value *Args[5];
+      Args[0] = m_pOP->GetU32Const((unsigned)OpCode);         // OpCode
+      Args[1] = m_pOP->GetU32Const(E->GetID());               // Output signature element ID
+      Args[2] = pRowIndexValueRel;                            // Row, relative to the element
+      Args[3] = m_pOP->GetU8Const(Comp - E->GetStartCol());   // Col, relative to the element
+      Args[4] = pUnitIndexValue;                              // Vertex/point index
+      Function *F = m_pOP->GetOpFunc(OpCode, pDxbcValueType);
+      Value *pValue = m_pBuilder->CreateCall(F, Args);
+
+      pValue = CastDxbcValue(pValue, DxbcValueType, ValueType);
+      pValue = ApplyOperandModifiers(pValue, O);
+
+      OVH.SetValue(pValue);
+    }
+
+    break;
+  }
+
+  case D3D11_SB_OPERAND_TYPE_INPUT_DOMAIN_POINT: {
+    OP::OpCode OpCode = OP::OpCode::DomainLocation;
+    CompType DxbcValueType = CompType::Kind::F32;
+    Type *pDxbcValueType = DxbcValueType.GetLLVMType(m_Ctx);
+    Function *F = m_pOP->GetOpFunc(OpCode, pDxbcValueType);
+
+    for (OperandValueHelper OVH(SrcVal, Mask, O); !OVH.IsDone(); OVH.Advance()) {
+      BYTE Comp = OVH.GetComp();
+      Value *Args[2];
+      Args[0] = m_pOP->GetU32Const((unsigned)OpCode);   // OpCode
+      Args[1] = m_pOP->GetU8Const(Comp);                // Component
+      Value *pValue = m_pBuilder->CreateCall(F, Args);
+
+      pValue = CastDxbcValue(pValue, DxbcValueType, ValueType);
+      pValue = ApplyOperandModifiers(pValue, O);
+
+      OVH.SetValue(pValue);
+    }
+
+    break;
+  }
+
+  case D3D11_SB_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID:
+  case D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID:
+  case D3D11_SB_OPERAND_TYPE_INPUT_GS_INSTANCE_ID:
+  case D3D11_SB_OPERAND_TYPE_INPUT_COVERAGE_MASK:
+  case D3D11_SB_OPERAND_TYPE_INNER_COVERAGE: {
+    OP::OpCode OpCode;
+    switch (O.m_Type) {
+    case D3D11_SB_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID: OpCode = OP::OpCode::OutputControlPointID; break;
+    case D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID:       OpCode = OP::OpCode::PrimitiveID; break;
+    case D3D11_SB_OPERAND_TYPE_INPUT_GS_INSTANCE_ID:    OpCode = OP::OpCode::GSInstanceID; break;
+    case D3D11_SB_OPERAND_TYPE_INPUT_COVERAGE_MASK:     OpCode = OP::OpCode::Coverage; break;
+    case D3D11_SB_OPERAND_TYPE_INNER_COVERAGE:          OpCode = OP::OpCode::InnerCoverage; break;
+    }
+    CompType DxbcValueType = CompType::Kind::I32;
+    Type *pDxbcValueType = DxbcValueType.GetLLVMType(m_Ctx);
+    Function *F = m_pOP->GetOpFunc(OpCode, pDxbcValueType);
+
+    Value *Args[1];
+    Args[0] = m_pOP->GetU32Const((unsigned)OpCode);   // OpCode
+    Value *pValue = m_pBuilder->CreateCall(F, Args);
+
+    pValue = CastDxbcValue(pValue, DxbcValueType, ValueType);
+    pValue = ApplyOperandModifiers(pValue, O);
+
+    for (OperandValueHelper OVH(SrcVal, Mask, O); !OVH.IsDone(); OVH.Advance()) {
+      OVH.SetValue(pValue);
+    }
+    break;
+  }
+
+  case D3D11_SB_OPERAND_TYPE_CYCLE_COUNTER: {
+    OP::OpCode OpCode = OP::OpCode::CycleCounterLegacy;
+    Function *F = m_pOP->GetOpFunc(OpCode, Type::getVoidTy(m_Ctx));
+
+    Value *Args[1];
+    Args[0] = m_pOP->GetU32Const((unsigned)OpCode);   // OpCode
+    Value *pValue = m_pBuilder->CreateCall(F, Args);
+
+    for (OperandValueHelper OVH(SrcVal, Mask, O); !OVH.IsDone(); OVH.Advance()) {
+      BYTE c = OVH.GetComp();
+      switch (c) {
+      case 0: {
+        Value *pLo32 = m_pBuilder->CreateExtractValue(pValue, 0);
+        pLo32 = CastDxbcValue(pLo32, CompType::Kind::I32, ValueType);
+        OVH.SetValue(pLo32);
+        break;
+      }
+      case 1: {
+        Value *pHi32 = m_pBuilder->CreateExtractValue(pValue, 1);
+        pHi32 = CastDxbcValue(pHi32, CompType::Kind::I32, ValueType);
+        OVH.SetValue(pHi32);
+        break;
+      }
+      default:
+        OVH.SetValue(m_pOP->GetU32Const(0));
+      }
+    }
+    break;
+  }
+
+  case D3D11_SB_OPERAND_TYPE_INPUT_FORK_INSTANCE_ID:
+  case D3D11_SB_OPERAND_TYPE_INPUT_JOIN_INSTANCE_ID: {
+    Scope &HullScope = m_ScopeStack.FindParentHullLoop();
+    Value *pValue = m_pBuilder->CreateLoad(HullScope.pInductionVar);
+
+    for (OperandValueHelper OVH(SrcVal, Mask, O); !OVH.IsDone(); OVH.Advance()) {
+      OVH.SetValue(pValue);
+    }
+
+    break;
+  }
+
+  case D3D11_SB_OPERAND_TYPE_THIS_POINTER: {
+    Value *pIfaceIdx = LoadOperandIndex(O.m_Index[0], O.m_IndexType[0]);
+    // The CBuffer layout here is a UINT for the interface class type selection, then 3 UINTs padding, per interface.
+    // After that, there's another 4 UINTs per interface which defines the "this" pointer data.
+    // Note, legacy CBuffer loads address their data in number of 4-float constants, not bytes or single elements.
+    // Since the "this" data comes after 4 UINTs per interface, adjust the CB offset just by the number of interfaces.
+    Value* pCBOffset = m_pBuilder->CreateAdd(m_pOP->GetU32Const(m_NumIfaces), pIfaceIdx);
+
+    Value *Args[3];
+    Args[0] = m_pOP->GetU32Const((unsigned)OP::OpCode::CBufferLoadLegacy);  // OpCode
+    Args[1] = CreateHandle(m_pInterfaceDataBuffer->GetClass(),
+                           m_pInterfaceDataBuffer->GetID(),
+                           m_pOP->GetU32Const(m_pInterfaceDataBuffer->GetLowerBound()),
+                           false /*Nonuniform*/);                           // CBuffer handle
+    Args[2] = pCBOffset;                                                    // 0-based index into cbuffer instance
+    Function *pCBufferLoadFunc = m_pOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, Type::getInt32Ty(m_Ctx));
+    Value* pCBufferRetValue = m_pBuilder->CreateCall(pCBufferLoadFunc, Args);
+
+    for (OperandValueHelper OVH(SrcVal, Mask, O); !OVH.IsDone(); OVH.Advance()) {
+      BYTE Comp = OVH.GetComp();
+
+      Value *pValue = m_pBuilder->CreateExtractValue(pCBufferRetValue, Comp);
+      pValue = CastDxbcValue(pValue, CompType::Kind::I32, ValueType);
+      pValue = ApplyOperandModifiers(pValue, O);
+
+      OVH.SetValue(pValue);
+    }
+    break;
+  }
+
+  default:
+    DXASSERT_ARGS(false, "Operand type %u is not yet implemented", O.m_Type);
+  }
+}
+
+const DxilResource& DxbcConverter::LoadSRVOperand(OperandValue &SrcVal,
+                                                  D3D10ShaderBinary::CInstruction &Inst,
+                                                  const unsigned OpIdx,
+                                                  const CMask &Mask,
+                                                  const CompType &ValueType) {
+  D3D10ShaderBinary::COperandBase &O = Inst.m_Operands[OpIdx];
+  DXASSERT(O.m_Type == D3D10_SB_OPERAND_TYPE_RESOURCE, "LoadSRVOperand should only be called for SRV operands.");
+  const DxilResource &R = GetSRVFromOperand(Inst, OpIdx);
+
+  // Setup SRV handle.
+  Value *pHandle = R.GetHandle();
+  if (pHandle == nullptr) {
+    // Create dynamic-index handle.
+    pHandle = CreateHandle(R.GetClass(), R.GetID(), LoadOperandIndex(O.m_Index[1], O.m_IndexType[1]), O.m_Nonuniform);
+  }
+
+  // Replicate handle values.
+  for (BYTE c = 0; c < DXBC::kWidth; c++) {
+    if (Mask.IsSet(c))
+      SrcVal[c] = pHandle;
+  }
+
+  return R;
+}
+
+const DxilResource& DxbcConverter::GetSRVFromOperand(D3D10ShaderBinary::CInstruction &Inst,
+                                                     const unsigned OpIdx) {
+  D3D10ShaderBinary::COperandBase &O = Inst.m_Operands[OpIdx];
+  DXASSERT(O.m_Type == D3D10_SB_OPERAND_TYPE_RESOURCE, "GetSRVFromOperand should only be called for SRV operands.");
+  // Upconvert operand to SM5.1.
+  if (O.m_IndexDimension == D3D10_SB_OPERAND_INDEX_1D) {
+    O.m_IndexDimension = D3D10_SB_OPERAND_INDEX_2D;
+    O.m_IndexType[1] = O.m_IndexType[0];
+    O.m_Index[1] = O.m_Index[0];
+  }
+
+  // Retrieve SRV range ID and record.
+  if (O.m_IndexType[0] == D3D10_SB_OPERAND_INDEX_IMMEDIATE32) {
+    unsigned RangeID = O.m_Index[0].m_RegIndex;
+    unsigned RecIdx = m_SRVRangeMap[RangeID];
+    return m_pPR->GetSRV(RecIdx);
+  }
+  else {
+    return GetInterfacesSRVDecl(Inst);
+  }
+}
+
+void DxbcConverter::StoreOperand(OperandValue &DstVal,
+                                 const D3D10ShaderBinary::CInstruction &Inst,
+                                 const unsigned OpIdx,
+                                 const CMask &Mask,
+                                 const CompType &ValueType) {
+  const D3D10ShaderBinary::COperandBase &O = Inst.m_Operands[OpIdx];
+
+  // Mark value as precise, if needed.
+  for (BYTE c = 0; c < DXBC::kWidth; c++) {
+    Value *pValue = DstVal[c];
+    if (pValue != nullptr)
+      DstVal[c] = MarkPrecise(DstVal[c], c);
+  }
+
+  ApplyInstructionModifiers(DstVal, Inst);
+
+  switch (O.m_Type) {
+  case D3D10_SB_OPERAND_TYPE_TEMP: {
+    DXASSERT_DXBC(O.m_IndexDimension == D3D10_SB_OPERAND_INDEX_1D);
+    unsigned Reg = O.m_Index[0].m_RegIndex;
+    CompType DxbcValueType = DXBC::GetCompTypeFromMinPrec(O.m_MinPrecision, ValueType);
+    if (DxbcValueType.IsBoolTy()) {
+      DxbcValueType = CompType::getI32();
+    }
+    Type *pDxbcValueType = DxbcValueType.GetLLVMType(m_Ctx);
+
+    if (DxbcValueType.GetKind() != CompType::Kind::F64) {
+      for (BYTE c = 0; c < DXBC::kWidth; c++) {
+        if (!Mask.IsSet(c)) continue;
+
+        Value *Args[3];
+        Args[0] = m_pOP->GetU32Const((unsigned)OP::OpCode::TempRegStore); // OpCode
+        Args[1] = m_pOP->GetU32Const(DXBC::GetRegIndex(Reg, c));          // Linearized register index
+        Args[2] = MarkPrecise(CastDxbcValue(DstVal[c], ValueType, DxbcValueType), c); // Value
+        Function *F = m_pOP->GetOpFunc(OP::OpCode::TempRegStore, pDxbcValueType);
+        MarkPrecise(m_pBuilder->CreateCall(F, Args));
+      }
+    } else {
+      for (BYTE c = 0; c < DXBC::kWidth; c += 2) {
+        if (!Mask.IsSet(c)) continue;
+
+        Value *pSDT;  // Split double type.
+        {
+          Value *Args[2];
+          Args[0] = m_pOP->GetU32Const((unsigned)OP::OpCode::SplitDouble);  // OpCode
+          Args[1] = DstVal[c];                                              // Double value
+          Function *F = m_pOP->GetOpFunc(OP::OpCode::SplitDouble, pDxbcValueType);
+          pSDT = MarkPrecise(m_pBuilder->CreateCall(F, Args), c);
+        }
+
+        Value *Args[3];
+        Args[0] = m_pOP->GetU32Const((unsigned)OP::OpCode::TempRegStore);   // OpCode
+        Args[1] = m_pOP->GetU32Const(DXBC::GetRegIndex(Reg, c));            // Linearized register index 1
+        Args[2] = MarkPrecise(m_pBuilder->CreateExtractValue(pSDT, 0), c);  // Value to store
+        Function *F = m_pOP->GetOpFunc(OP::OpCode::TempRegStore, Type::getInt32Ty(m_Ctx));
+        Value *pVal = m_pBuilder->CreateCall(F, Args);
+        MarkPrecise(pVal, c);
+        Args[1] = m_pOP->GetU32Const(DXBC::GetRegIndex(Reg, c+1));          // Linearized register index 2
+        Args[2] = MarkPrecise(m_pBuilder->CreateExtractValue(pSDT, 1), c+1);// Value to store
+        MarkPrecise(m_pBuilder->CreateCall(F, Args));
+      }
+    }
+
+    break;
+  }
+
+  case D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP: {
+    DXASSERT_DXBC(O.m_IndexDimension == D3D10_SB_OPERAND_INDEX_2D);
+    DXASSERT_DXBC(O.m_IndexType[0] == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);
+    unsigned Reg = O.m_Index[0].m_RegIndex;
+    IndexableReg &IRRec = m_IndexableRegs[Reg];
+    Value *pXRegIndex = LoadOperandIndex(O.m_Index[1], O.m_IndexType[1]);
+    Value *pRegIndex = m_pBuilder->CreateMul(pXRegIndex, m_pOP->GetI32Const(IRRec.NumComps));
+    CompType DxbcValueType = DXBC::GetCompTypeFromMinPrec(O.m_MinPrecision, ValueType);
+    if (DxbcValueType.IsBoolTy()) {
+      DxbcValueType = CompType::getI32();
+    }
+
+    if (DxbcValueType.GetKind() != CompType::Kind::F64) {
+      for (BYTE c = 0; c < DXBC::kWidth; c++) {
+        if (!Mask.IsSet(c)) continue;
+
+        // Create GEP.
+        Value *pIndex = m_pBuilder->CreateAdd(pRegIndex, m_pOP->GetU32Const(c));
+        Value *pGEPIndices[2] = { m_pOP->GetU32Const(0), pIndex };
+        if (!DxbcValueType.HasMinPrec()) {
+          Value *pBasePtr = m_IndexableRegs[Reg].pValue32;
+          Value *pPtr = m_pBuilder->CreateGEP(pBasePtr, pGEPIndices);
+          Value *pValue = MarkPrecise(CastDxbcValue(DstVal[c], ValueType, CompType::getF32()), c);
+          MarkPrecise(m_pBuilder->CreateAlignedStore(pValue, pPtr, kRegCompAlignment), c);
+        } else {
+          Value *pBasePtr = m_IndexableRegs[Reg].pValue16;
+          Value *pPtr = m_pBuilder->CreateGEP(pBasePtr, pGEPIndices);
+          Value *pValue = MarkPrecise(CastDxbcValue(DstVal[c], ValueType, CompType::getF16()), c);
+          MarkPrecise(m_pBuilder->CreateAlignedStore(pValue, pPtr, kRegCompAlignment/2), c);
+        }
+      }
+    } else {
+      // Double precision.
+      for (BYTE c = 0; c < DXBC::kWidth; c += 2) {
+        if (!Mask.IsSet(c)) continue;
+
+        // Create GEP.
+        Value *pIndex = m_pBuilder->CreateAdd(pRegIndex, m_pOP->GetU32Const(c));
+        Value *pGEPIndices[] = { pIndex };
+        Value *pBasePtr = m_pBuilder->CreateBitCast(m_IndexableRegs[Reg].pValue32, Type::getDoublePtrTy(m_Ctx));
+        Value *pPtr = m_pBuilder->CreateGEP(pBasePtr, pGEPIndices);
+        MarkPrecise(m_pBuilder->CreateAlignedStore(DstVal[c], pPtr, kRegCompAlignment*2));
+      }
+    }
+    break;
+  }
+
+  case D3D10_SB_OPERAND_TYPE_OUTPUT: {
+    unsigned Reg = O.m_Index[0].m_RegIndex;
+    // Row index expression.
+    Value *pRowIndexValue = LoadOperandIndex(O.m_Index[0], O.m_IndexType[0]);
+
+    bool bStoreOutputReg = !(m_pSM->IsGS() && m_pPR->HasMultipleOutputStreams());
+
+    if (bStoreOutputReg) {
+      for (unsigned c = 0; c < DXBC::kWidth; c++) {
+        if (!Mask.IsSet(c)) continue;
+
+        // Retrieve signature element.
+        OP::OpCode OpCode;
+        const DxilSignatureElement *E;
+        if (!m_bPatchConstantPhase) {
+          E = m_pOutputSignature->GetElementWithStream(Reg, c, m_pPR->GetOutputStream());
+          OpCode = OP::OpCode::StoreOutput;
+        } else {
+          E = m_pPatchConstantSignature->GetElementWithStream(Reg, c, m_pPR->GetOutputStream());
+          OpCode = OP::OpCode::StorePatchConstant;
+        }
+        CompType DxbcValueType = E->GetCompType();
+        if (DxbcValueType.IsBoolTy()) {
+          DxbcValueType = CompType::getI32();
+        }
+        Type *pLlvmDxbcValueType = DxbcValueType.GetLLVMType(m_Ctx);
+
+        // Make row index relative within element.
+        Value *pRowIndexValueRel = m_pBuilder->CreateSub(pRowIndexValue, m_pOP->GetU32Const(E->GetStartRow()));
+
+        Value *Args[5];
+        Args[0] = m_pOP->GetU32Const((unsigned)OpCode);                 // OpCode
+        Args[1] = m_pOP->GetU32Const(E->GetID());                       // Output signature element ID
+        Args[2] = pRowIndexValueRel;                                    // Row, relative to the element
+        Args[3] = m_pOP->GetU8Const(c - E->GetStartCol());              // Col, relative to the element
+        Args[4] = MarkPrecise(CastDxbcValue(DstVal[c], ValueType, DxbcValueType), c); // Value
+        Function *F = m_pOP->GetOpFunc(OpCode, pLlvmDxbcValueType);
+        MarkPrecise(m_pBuilder->CreateCall(F, Args));
+      }
+    } else {
+      // In GS with multiple streams, output register file is shared among the streams.
+      // Store the values into additional temp registers, and later, store these at the emit points.
+      CompType DxbcValueType = DXBC::GetCompTypeFromMinPrec(O.m_MinPrecision, ValueType);
+      if (DxbcValueType.IsBoolTy()) {
+        DxbcValueType = CompType::getI32();
+      }
+      Type *pDxbcValueType = DxbcValueType.GetLLVMType(m_Ctx);
+
+      for (BYTE c = 0; c < DXBC::kWidth; c++) {
+        if (!Mask.IsSet(c)) continue;
+
+        Value *Args[3];
+        Args[0] = m_pOP->GetU32Const((unsigned)OP::OpCode::TempRegStore);             // OpCode
+        unsigned TempReg = GetGSTempRegForOutputReg(Reg);
+        Args[1] = m_pOP->GetU32Const(DXBC::GetRegIndex(TempReg, c));                  // Linearized register index
+        Args[2] = MarkPrecise(CastDxbcValue(DstVal[c], ValueType, DxbcValueType), c); // Value to store
+        Function *F = m_pOP->GetOpFunc(OP::OpCode::TempRegStore, pDxbcValueType);
+        MarkPrecise(m_pBuilder->CreateCall(F, Args));
+      }
+    }
+
+    break;
+  }
+
+  case D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH:
+  case D3D11_SB_OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL:
+  case D3D11_SB_OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL:
+  case D3D11_SB_OPERAND_TYPE_OUTPUT_STENCIL_REF:
+  case D3D10_SB_OPERAND_TYPE_OUTPUT_COVERAGE_MASK: {
+    DXASSERT_DXBC(O.m_IndexDimension == D3D10_SB_OPERAND_INDEX_0D);
+    for (unsigned c = 0; c < DXBC::kWidth; c++) {
+      if (!Mask.IsSet(c)) continue;
+
+      // Retrieve signature element.
+      DXASSERT(m_pSM->IsPS(), "PS has only one output stream.");
+      const DxilSignatureElement *E = m_pOutputSignature->GetElement(O.m_Type);
+      CompType DxbcValueType = E->GetCompType();
+      Type *pLlvmDxbcValueType = DxbcValueType.GetLLVMType(m_Ctx);
+
+      Value *Args[5];
+      Args[0] = m_pOP->GetU32Const((unsigned)OP::OpCode::StoreOutput);              // OpCode
+      Args[1] = m_pOP->GetU32Const(E->GetID());                                     // Output signature element ID
+      Args[2] = m_pOP->GetU32Const(0);                                              // Row, relative to the element
+      Args[3] = m_pOP->GetU8Const(c - E->GetStartCol());                            // Col, relative to the element
+      Args[4] = MarkPrecise(CastDxbcValue(DstVal[c], ValueType, DxbcValueType), c); // Value
+      Function *F = m_pOP->GetOpFunc(OP::OpCode::StoreOutput, pLlvmDxbcValueType);
+      MarkPrecise(m_pBuilder->CreateCall(F, Args));
+    }
+
+    break;
+  }
+
+  case D3D10_SB_OPERAND_TYPE_NULL:
+    break;
+
+  default:
+    DXASSERT_ARGS(false, "Operand type %u is not yet implemented", O.m_Type);
+  }
+}
+
+Value *DxbcConverter::LoadOperandIndex(const D3D10ShaderBinary::COperandIndex &OpIndex,
+                                       const D3D10_SB_OPERAND_INDEX_REPRESENTATION IndexType) {
+  Value *pValue = nullptr;
+
+  switch (IndexType) {
+  case D3D10_SB_OPERAND_INDEX_IMMEDIATE32:
+    DXASSERT_DXBC(OpIndex.m_RelRegType == D3D10_SB_OPERAND_TYPE_IMMEDIATE32);
+    pValue = m_pOP->GetU32Const(OpIndex.m_RegIndex);
+    break;
+
+  case D3D10_SB_OPERAND_INDEX_IMMEDIATE64:
+    DXASSERT_DXBC(false);
+    break;
+
+  case D3D10_SB_OPERAND_INDEX_RELATIVE:
+    pValue = LoadOperandIndexRelative(OpIndex);
+    break;
+
+  case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: {
+    unsigned Offset = OpIndex.m_RegIndex;
+    pValue = LoadOperandIndexRelative(OpIndex);
+    if (Offset != 0) {
+      pValue = m_pBuilder->CreateAdd(pValue, m_pOP->GetU32Const(Offset));
+    }
+    break;
+  }
+
+  case D3D10_SB_OPERAND_INDEX_IMMEDIATE64_PLUS_RELATIVE:
+    DXASSERT_DXBC(false);
+    break;
+
+  default:
+    DXASSERT_DXBC(false);
+    break;
+  }
+
+  return pValue;
+}
+
+Value *DxbcConverter::LoadOperandIndexRelative(const D3D10ShaderBinary::COperandIndex &OpIndex) {
+  Value *pValue = nullptr;
+
+  switch (OpIndex.m_RelRegType) {
+  case D3D10_SB_OPERAND_TYPE_TEMP: {
+    unsigned Reg = OpIndex.m_RelIndex;
+    unsigned Comp = OpIndex.m_ComponentName;
+
+    Value *Args[2];
+    Args[0] = m_pOP->GetU32Const((unsigned)OP::OpCode::TempRegLoad);              // OpCode
+    Args[1] = m_pOP->GetU32Const(DXBC::GetRegIndex(Reg, Comp)); // Linearized register index
+    Function *F = m_pOP->GetOpFunc(OP::OpCode::TempRegLoad, Type::getInt32Ty(m_Ctx));
+    pValue = m_pBuilder->CreateCall(F, Args);
+
+    break;
+  }
+
+  case D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP: {
+    unsigned Reg = OpIndex.m_RelIndex;
+    unsigned RegIdx = OpIndex.m_RelIndex1;
+    unsigned Comp = OpIndex.m_ComponentName;
+    IndexableReg &IRRec = m_IndexableRegs[Reg];
+
+    Value *pGEPIndices[2] = { m_pOP->GetU32Const(0), m_pOP->GetU32Const(RegIdx*IRRec.NumComps + Comp) };
+    Value *pBasePtr = m_IndexableRegs[Reg].pValue32;
+    Value *pPtr = m_pBuilder->CreateGEP(pBasePtr, pGEPIndices);
+    pValue = m_pBuilder->CreateAlignedLoad(pPtr, kRegCompAlignment);
+    DXASSERT(pValue->getType()->isFloatTy(), "otherwise broke the assumption that alloca locations are floats");
+    pValue = CastDxbcValue(pValue, CompType::getF32(), CompType::getI32());
+
+    break;
+  }
+
+  default:
+    DXASSERT_DXBC(false);
+  }
+
+  return pValue;
+}
+
+Value *DxbcConverter::CastDxbcValue(Value *pValue, const CompType &SrcType, const CompType &DstType) {
+  if (SrcType == DstType)
+    return pValue;
+
+  DXASSERT(SrcType.GetLLVMType(m_Ctx) == pValue->getType(), "otherwise caller passed incorrect args");
+
+  switch (SrcType.GetKind()) {
+  case CompType::Kind::I1:
+    switch (DstType.GetKind()) {
+    case CompType::Kind::I1:
+      return pValue;
+    case CompType::Kind::I16:
+    case CompType::Kind::U16:
+      return m_pBuilder->CreateSExt(pValue, Type::getInt16Ty(m_Ctx));
+    case CompType::Kind::I32:
+    case CompType::Kind::U32:
+      return m_pBuilder->CreateSExt(pValue, Type::getInt32Ty(m_Ctx));
+    case CompType::Kind::F16:
+      return m_pBuilder->CreateBitCast(m_pBuilder->CreateSExt(pValue, Type::getInt16Ty(m_Ctx)), Type::getHalfTy(m_Ctx));
+    case CompType::Kind::F32:
+      return m_pBuilder->CreateBitCast(m_pBuilder->CreateSExt(pValue, Type::getInt32Ty(m_Ctx)), Type::getFloatTy(m_Ctx));
+    default: __fallthrough;
+    }
+    break;
+
+  case CompType::Kind::I16:
+    switch (DstType.GetKind()) {
+    case CompType::Kind::I1:
+      return m_pBuilder->CreateICmpNE(pValue, m_pOP->GetI16Const(0));
+    case CompType::Kind::U16:
+      DXASSERT_DXBC(false);
+      return pValue;
+    case CompType::Kind::I32:
+    case CompType::Kind::U32:
+      return m_pBuilder->CreateSExt(pValue, Type::getInt32Ty(m_Ctx));
+    case CompType::Kind::F16: {
+      DXASSERT_DXBC(false);
+      pValue = m_pBuilder->CreateSExt(pValue, Type::getInt32Ty(m_Ctx));
+      pValue = CreateBitCast(pValue, CompType::getI32(), CompType::getF32());
+      return m_pBuilder->CreateFPTrunc(pValue, Type::getHalfTy(m_Ctx));
+    }
+    case CompType::Kind::F32: { // mov
+      pValue = m_pBuilder->CreateSExt(pValue, Type::getInt32Ty(m_Ctx));
+      return CreateBitCast(pValue, CompType::getI32(), CompType::getF32());
+    }
+    default: __fallthrough;
+    }
+    break;
+
+  case CompType::Kind::U16:
+    switch (DstType.GetKind()) {
+    case CompType::Kind::I1:
+      return m_pBuilder->CreateICmpNE(pValue, m_pOP->GetU16Const(0));
+    case CompType::Kind::I16:
+      DXASSERT_DXBC(false);
+      return pValue;
+    case CompType::Kind::I32:
+    case CompType::Kind::U32:
+      return m_pBuilder->CreateZExt(pValue, Type::getInt32Ty(m_Ctx));
+    case CompType::Kind::F16: {
+      DXASSERT_DXBC(false);
+      pValue = m_pBuilder->CreateZExt(pValue, Type::getInt32Ty(m_Ctx));
+      pValue = CreateBitCast(pValue, CompType::getI32(), CompType::getF32());
+      return m_pBuilder->CreateFPTrunc(pValue, Type::getHalfTy(m_Ctx));
+    }
+    case CompType::Kind::F32: { // mov
+      pValue = m_pBuilder->CreateZExt(pValue, Type::getInt32Ty(m_Ctx));
+      return CreateBitCast(pValue, CompType::getI32(), CompType::getF32());
+    }
+    default: __fallthrough;
+    }
+    break;
+
+  case CompType::Kind::I32:
+  case CompType::Kind::U32:
+    switch (DstType.GetKind()) {
+    case CompType::Kind::I1:
+      return m_pBuilder->CreateICmpNE(pValue, m_pOP->GetI32Const(0));
+    case CompType::Kind::I16:
+    case CompType::Kind::U16:
+      return m_pBuilder->CreateTrunc(pValue, Type::getInt16Ty(m_Ctx));
+    case CompType::Kind::I32:
+    case CompType::Kind::U32:
+      return pValue;
+    case CompType::Kind::F16: {
+      DXASSERT_DXBC(false);
+      pValue = CreateBitCast(pValue, CompType::getI32(), CompType::getF32());
+      return m_pBuilder->CreateFPTrunc(pValue, Type::getHalfTy(m_Ctx));
+    }
+    case CompType::Kind::F32:
+      return CreateBitCast(pValue, CompType::getI32(), CompType::getF32());
+    default: __fallthrough;
+    }
+    break;
+
+  case CompType::Kind::F16:
+    switch (DstType.GetKind()) {
+    case CompType::Kind::I16:
+    case CompType::Kind::U16: {
+      DXASSERT_DXBC(false);
+      pValue = m_pBuilder->CreateFPExt(pValue, Type::getFloatTy(m_Ctx));
+      pValue = CreateBitCast(pValue, CompType::getF32(), CompType::getI32());
+      return m_pBuilder->CreateTrunc(pValue, Type::getInt16Ty(m_Ctx));
+    }
+    case CompType::Kind::I32:
+    case CompType::Kind::U32: { // mov
+      pValue = m_pBuilder->CreateFPExt(pValue, Type::getFloatTy(m_Ctx));
+      return CreateBitCast(pValue, CompType::getF32(), CompType::getI32());
+    }
+    case CompType::Kind::F32:
+      return m_pBuilder->CreateFPExt(pValue, Type::getFloatTy(m_Ctx));
+    default: __fallthrough;
+    }
+    break;
+
+  case CompType::Kind::F32:
+    switch (DstType.GetKind()) {
+    case CompType::Kind::I1: {
+      pValue = CreateBitCast(pValue, CompType::getF32(), CompType::getI32());
+      return m_pBuilder->CreateICmpNE(pValue, m_pOP->GetI32Const(0));
+    }
+    case CompType::Kind::I16:
+    case CompType::Kind::U16: { // min-prec for TGSM load.
+      pValue = CreateBitCast(pValue, CompType::getF32(), CompType::getI32());
+      return m_pBuilder->CreateTrunc(pValue, Type::getInt16Ty(m_Ctx));
+    }
+    case CompType::Kind::I32:
+    case CompType::Kind::U32:
+      return CreateBitCast(pValue, CompType::getF32(), CompType::getI32());
+    case CompType::Kind::F16:
+      return m_pBuilder->CreateFPTrunc(pValue, Type::getHalfTy(m_Ctx));
+    default: __fallthrough;
+    }
+    break;
+
+  default: __fallthrough;
+  }
+
+  DXASSERT(false, "unsupported cast combination");
+  return nullptr;
+}
+
+Value *DxbcConverter::CreateBitCast(Value *pValue, const CompType &SrcType, const CompType &DstType) {
+  DXASSERT(SrcType.GetLLVMType(m_Ctx) == pValue->getType(), "otherwise caller passed incorrect args");
+
+  OP::OpCode OpCode = (OP::OpCode)(-1);
+
+  switch (SrcType.GetKind()) {
+  case CompType::Kind::I16:
+    switch (DstType.GetKind()) {
+    case CompType::Kind::F16:   OpCode = OP::OpCode::BitcastI16toF16; break;
+    }
+    break;
+
+  case CompType::Kind::I32:
+    switch (DstType.GetKind()) {
+    case CompType::Kind::F32:   OpCode = OP::OpCode::BitcastI32toF32; break;
+    }
+    break;
+
+  case CompType::Kind::I64:
+    switch (DstType.GetKind()) {
+    case CompType::Kind::F64:   OpCode = OP::OpCode::BitcastI64toF64; break;
+    }
+    break;
+
+  case CompType::Kind::F16:
+    switch (DstType.GetKind()) {
+    case CompType::Kind::I16:   OpCode = OP::OpCode::BitcastF16toI16; break;
+    }
+    break;
+
+  case CompType::Kind::F32:
+    switch (DstType.GetKind()) {
+    case CompType::Kind::I32:   OpCode = OP::OpCode::BitcastF32toI32; break;
+    }
+    break;
+
+  case CompType::Kind::F64:
+    switch (DstType.GetKind()) {
+    case CompType::Kind::I64:   OpCode = OP::OpCode::BitcastF64toI64; break;
+    }
+    break;
+  }
+
+  Value *Args[2];
+  Args[0] = m_pOP->GetU32Const((unsigned)OpCode);     // OpCode
+  Args[1] = pValue;                                   // Input
+
+  Function *F = m_pOP->GetOpFunc(OpCode, Type::getVoidTy(m_Ctx));
+
+  return m_pBuilder->CreateCall(F, Args);
+}
+
+Value *DxbcConverter::ApplyOperandModifiers(Value *pValue, const D3D10ShaderBinary::COperandBase &O) {
+  bool bAbsModifier = (O.m_Modifier & D3D10_SB_OPERAND_MODIFIER_ABS) != 0;
+  bool bNegModifier = (O.m_Modifier & D3D10_SB_OPERAND_MODIFIER_NEG) != 0;
+
+  if (bAbsModifier) {
+    DXASSERT_DXBC(pValue->getType()->isFloatingPointTy());
+    Function *F = m_pOP->GetOpFunc(OP::OpCode::FAbs, pValue->getType());
+    Value *Args[2];
+    Args[0] = m_pOP->GetU32Const((unsigned)OP::OpCode::FAbs);
+    Args[1] = pValue;
+    pValue = m_pBuilder->CreateCall(F, Args);
+  }
+
+  if (bNegModifier) {
+    if (pValue->getType()->isFloatingPointTy()) {
+      pValue = m_pBuilder->CreateFNeg(pValue);
+    } else {
+      DXASSERT_DXBC(pValue->getType()->isIntegerTy());
+      pValue = m_pBuilder->CreateNeg(pValue);
+    }
+  }
+
+  return pValue;
+}
+
+void DxbcConverter::ApplyInstructionModifiers(OperandValue &DstVal, 
+                                              const D3D10ShaderBinary::CInstruction &Inst) {
+  if (Inst.m_bSaturate) {
+    map<Value *, Value *> M;
+
+    for (BYTE c = 0; c < DXBC::kWidth; c++) {
+      Value *pValue = DstVal[c];
+      if (pValue == nullptr) continue;
+
+      auto const &it = M.find(pValue);
+      if (it != M.end()) {
+        DstVal[c] = it->second;
+      } else {
+        Value *Args[2];
+        Args[0] = m_pOP->GetU32Const((unsigned)OP::OpCode::Saturate); // OpCode
+        Args[1] = pValue;                                             // Value
+        Function *F = m_pOP->GetOpFunc(OP::OpCode::Saturate, pValue->getType());
+        Value *pSaturatedValue = MarkPrecise(m_pBuilder->CreateCall(F, Args), c);
+        DstVal[c] = pSaturatedValue;
+        M[pValue] = pSaturatedValue;
+      }
+
+      if (pValue->getType() == Type::getDoubleTy(m_Ctx)) {
+        c++;
+      }
+    }
+  }
+}
+
+CompType DxbcConverter::InferOperandType(const D3D10ShaderBinary::CInstruction &Inst, 
+                                         const unsigned OpIdx,
+                                         const CMask &Mask) {
+  const D3D10ShaderBinary::COperandBase &O = Inst.m_Operands[OpIdx];
+
+  for (BYTE c = 0; c < DXBC::kWidth; c++) {
+    if (!Mask.IsSet(c)) continue;
+
+    switch (O.m_Type) {
+    case D3D10_SB_OPERAND_TYPE_INPUT: {
+      unsigned Reg = O.m_Index[(m_pSM->IsGS() || m_pSM->IsHS()) ? 1 : 0].m_RegIndex;
+      unsigned Comp = O.m_ComponentName;
+      if (O.m_ComponentSelection == D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE)
+        Comp = O.m_Swizzle[c];
+      const DxilSignatureElement *E = m_pInputSignature->GetElement(Reg, Comp);
+      return E->GetCompType();
+    }
+
+    case D3D10_SB_OPERAND_TYPE_OUTPUT: {
+      unsigned Reg = O.m_Index[0].m_RegIndex;
+
+      if (!m_pSM->IsGS()) {
+        if (!m_bPatchConstantPhase) {
+          const DxilSignatureElement *E = m_pOutputSignature->GetElement(Reg, c);
+          return E->GetCompType();
+        } else {
+          const DxilSignatureElement *E = m_pPatchConstantSignature->GetElement(Reg, c);
+          return E->GetCompType();
+        }
+      } else {
+        CompType CT;
+        bool bCTInitialized = false;
+        for (unsigned Stream = 0; Stream < DXIL::kNumOutputStreams; Stream++) {
+          const DxilSignatureElement *E = m_pOutputSignature->GetElement(Reg, c);
+          if (E == nullptr)
+            continue;
+
+          if (!bCTInitialized) {
+            bCTInitialized = true;
+            CT = E->GetCompType();
+          } else {
+            if (CT.GetKind() != E->GetCompType().GetKind())
+              return CompType::getInvalid();
+          }
+        }
+
+        return CT;
+      }
+    }
+
+    default: __fallthrough;
+    }
+  }
+
+  if (O.m_MinPrecision != D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT) {
+    return DXBC::GetCompTypeFromMinPrec(O.m_MinPrecision, CompType::getInvalid());
+  }
+
+  return CompType::getInvalid();
+}
+
+void DxbcConverter::CheckDxbcString(const char *pStr, const void *pMaxPtrInclusive) {
+  for (;; pStr++) {
+    if (pStr > pMaxPtrInclusive) IFT(DXC_E_INCORRECT_DXBC);
+    if (*pStr == '\0')
+      break;
+  }
+}
+
+void DxbcConverter::Optimize() {
+  class PassManager PassManager;
+
+#if DXBCCONV_DBG
+  IFTBOOL(!verifyModule(*m_pModule), DXC_E_IR_VERIFICATION_FAILED); // verifyModule returns true for failure
+#endif
+
+  // Verify that CFG is reducible.
+  IFTBOOL(IsReducible(*m_pModule, IrreducibilityAction::ThrowException), DXC_E_IRREDUCIBLE_CFG);
+
+  if (m_bRunDxilCleanup) {
+    PassManager.add(createDxilCleanupPass());
+    PassManager.run(*m_pModule);
+  }
+
+#if DXBCCONV_DBG
+  IFTBOOL(!verifyModule(*m_pModule), DXC_E_IR_VERIFICATION_FAILED);
+#endif
+}
+
+void DxbcConverter::AddOptimizationPasses(PassManagerBase &PassManager, unsigned OptLevel) {
+  PassManagerBuilder Builder;
+  Builder.OptLevel = OptLevel;
+  Builder.SizeLevel = 0;
+  Builder.populateModulePassManager(PassManager);
+}
+
+void DxbcConverter::CreateBranchIfNeeded(BasicBlock *pBB, BasicBlock *pTargetBB) {
+  bool bNeedBranch = true;
+  if (!pBB->empty()) {
+    Instruction *pLastInst = &pBB->getInstList().back();
+    if (pLastInst->getOpcode() == Instruction::Br || pLastInst->getOpcode() == Instruction::Ret)
+      bNeedBranch = false;
+    else
+      DXASSERT(!pLastInst->isTerminator(), "otherwise broke possible assumptions of control flow");
+  }
+
+  if (bNeedBranch)
+    m_pBuilder->CreateBr(pTargetBB);
+}
+
+Value *DxbcConverter::LoadZNZCondition(D3D10ShaderBinary::CInstruction &Inst,
+                                       const unsigned OpIdx) {
+  D3D10ShaderBinary::COperandBase &O = Inst.m_Operands[OpIdx];
+  D3D10_SB_INSTRUCTION_TEST_BOOLEAN TestType = Inst.m_Test;
+  BYTE Comp = (BYTE)O.m_ComponentName;
+  CMask ReadMask = CMask::MakeCompMask(Comp);
+  OperandValue In1;
+  LoadOperand(In1, Inst, 0, ReadMask, CompType::getI32());
+
+  Value *pCond = In1[Comp];
+  if (TestType == D3D10_SB_INSTRUCTION_TEST_NONZERO) {
+    pCond = m_pBuilder->CreateICmpNE(pCond, m_pOP->GetI32Const(0));
+  } else {
+    pCond = m_pBuilder->CreateICmpEQ(pCond, m_pOP->GetI32Const(0));
+  }
+
+  return pCond;
+}
+
+D3D11_SB_OPERAND_MIN_PRECISION DxbcConverter::GetHigherPrecision(
+                                    D3D11_SB_OPERAND_MIN_PRECISION p1,
+                                    D3D11_SB_OPERAND_MIN_PRECISION p2) {
+  if (p1 == D3D11_SB_OPERAND_MIN_PRECISION_FLOAT_2_8) p1 = D3D11_SB_OPERAND_MIN_PRECISION_FLOAT_16;
+  if (p2 == D3D11_SB_OPERAND_MIN_PRECISION_FLOAT_2_8) p2 = D3D11_SB_OPERAND_MIN_PRECISION_FLOAT_16;
+
+  if (p1 == p2) return p1;
+
+  return D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT;
+}
+
+unsigned DxbcConverter::GetGSTempRegForOutputReg(unsigned OutputReg) const {
+  return m_NumTempRegs + OutputReg;
+}
+
+
+//------------------------------------------------------------------------------
+//
+//  DxbcConverter::ScopeStack methods.
+//
+DxbcConverter::ScopeStack::ScopeStack()
+: m_FuncCount(0)
+, m_IfCount(0)
+, m_LoopCount(0)
+, m_SwitchCount(0)
+, m_HullLoopCount(0) {
+}
+
+DxbcConverter::Scope &DxbcConverter::ScopeStack::Top() {
+  IFTBOOL(!m_Scopes.empty(), E_FAIL);
+  return m_Scopes.back();
+}
+
+DxbcConverter::Scope &DxbcConverter::ScopeStack::Push(enum Scope::Kind Kind, BasicBlock *pPreScopeBB) {
+  Scope S;
+  DXASSERT(Kind < Scope::LastKind, "otherwise the caller passed incorrect scope kind value");
+  S.Kind = Kind;
+  S.pPreScopeBB = pPreScopeBB;
+  switch (Kind) {
+  case Scope::Function: S.NameIndex = m_FuncCount++;      break;
+  case Scope::If:       S.NameIndex = m_IfCount++;        break;
+  case Scope::Loop:     S.NameIndex = m_LoopCount++;      break;
+  case Scope::Switch:   S.NameIndex = m_SwitchCount++;    break;
+  case Scope::HullLoop: S.NameIndex = m_HullLoopCount++;  break;
+  }
+  m_Scopes.emplace_back(S);
+  return Top();
+}
+
+void DxbcConverter::ScopeStack::Pop() {
+  m_Scopes.pop_back();
+}
+
+bool DxbcConverter::ScopeStack::IsEmpty() const {
+  return m_Scopes.empty();
+}
+
+DxbcConverter::Scope &DxbcConverter::ScopeStack::FindParentLoop() {
+  for (auto it = m_Scopes.rbegin(); it != m_Scopes.rend(); ++it) {
+    Scope &Scope = *it;
+
+    if (Scope.Kind == Scope::Loop)
+      return Scope;
+  }
+
+  DXASSERT(false, "otherwise was not able to find the parent enclosing scope");
+  IFTBOOL(false, E_FAIL);
+  return Top();
+}
+
+DxbcConverter::Scope &DxbcConverter::ScopeStack::FindParentLoopOrSwitch() {
+  for (auto it = m_Scopes.rbegin(); it != m_Scopes.rend(); ++it) {
+    Scope &Scope = *it;
+
+    if (Scope.Kind == Scope::Loop || Scope.Kind == Scope::Switch)
+      return Scope;
+  }
+
+  DXASSERT(false, "otherwise was not able to find the parent enclosing scope");
+  IFTBOOL(false, E_FAIL);
+  return Top();
+}
+
+DxbcConverter::Scope &DxbcConverter::ScopeStack::FindParentFunction() {
+  for (auto it = m_Scopes.rbegin(); it != m_Scopes.rend(); ++it) {
+    Scope &Scope = *it;
+
+    if (Scope.Kind == Scope::Function)
+      return Scope;
+  }
+
+  DXASSERT(false, "otherwise was not able to find the parent enclosing scope");
+  IFTBOOL(false, E_FAIL);
+  return Top();
+}
+
+DxbcConverter::Scope &DxbcConverter::ScopeStack::FindParentHullLoop() {
+  for (auto it = m_Scopes.rbegin(); it != m_Scopes.rend(); ++it) {
+    Scope &Scope = *it;
+
+    if (Scope.Kind == Scope::HullLoop)
+      return Scope;
+  }
+
+  DXASSERT(false, "otherwise was not able to find the parent enclosing scope");
+  IFTBOOL(false, E_FAIL);
+  return Top();
+}
+
+string DxbcConverter::SynthesizeResGVName(const char *pNamePrefix, unsigned ID) {
+  string GVName;
+  raw_string_ostream GVNameStream(GVName);
+  (GVNameStream << pNamePrefix << ID).flush();
+  return GVName;
+}
+
+StructType *DxbcConverter::GetStructResElemType(unsigned StructSizeInBytes) {
+  string GVTypeName;
+  raw_string_ostream GVTypeNameStream(GVTypeName);
+  (GVTypeNameStream << "dx.types.i8x" << StructSizeInBytes).flush();
+  StructType *pGVType = m_pModule->getTypeByName(GVTypeName);
+  if (pGVType == nullptr) {
+    pGVType = StructType::create(m_Ctx, ArrayType::get(Type::getInt8Ty(m_Ctx), StructSizeInBytes), GVTypeName);
+  }
+  return pGVType;
+}
+
+StructType *DxbcConverter::GetTypedResElemType(CompType CT) {
+  string GVTypeName;
+  raw_string_ostream GVTypeNameStream(GVTypeName);
+  (GVTypeNameStream << "dx.types." << CT.GetName()).flush();
+  StructType *pGVType = m_pModule->getTypeByName(GVTypeName);
+  if (pGVType == nullptr) {
+    Type *pElemType = nullptr;
+    if (CT.GetKind() == CompType::Kind::SNormF32) {
+      pElemType = m_pPR->GetTypeSystem().GetSNormF32Type(1);
+    } else if (CT.GetKind() == CompType::Kind::UNormF32) {
+      pElemType = m_pPR->GetTypeSystem().GetUNormF32Type(1);
+    } else {
+      pElemType = CT.GetLLVMType(m_Ctx);
+    }
+    if (!pElemType->isStructTy()) {
+      pGVType = StructType::create(m_Ctx, pElemType, GVTypeName);
+    } else {
+      pGVType = dyn_cast<StructType>(pElemType);
+    }
+  }
+  return pGVType;
+}
+
+UndefValue *DxbcConverter::DeclareUndefPtr(Type *pType, unsigned AddrSpace) {
+  Type *pPtrType = PointerType::get(pType, AddrSpace);
+  UndefValue *pUV = UndefValue::get(pPtrType);
+  return pUV;
+}
+
+Value *DxbcConverter::MarkPrecise(Value *pVal, BYTE Comp) {
+  if ((Comp == BYTE(-1) && !m_PreciseMask.IsZero()) || (Comp != BYTE(-1) && m_PreciseMask.IsSet(Comp))) {
+    if (Instruction *pInst = dyn_cast<Instruction>(pVal)) {
+      bool bAttachPreciseMD = true;
+      if (dyn_cast<FPMathOperator>(pInst) != nullptr && dyn_cast<CallInst>(pInst) == nullptr) {
+        FastMathFlags FMF;
+        pInst->copyFastMathFlags(FMF);
+        bAttachPreciseMD = false;
+      }
+      
+      if (bAttachPreciseMD) {
+        MDNode *pMD = MDNode::get(m_Ctx, ConstantAsMetadata::get(m_pOP->GetI32Const(1)));
+        pInst->setMetadata(DxilMDHelper::kDxilPreciseAttributeMDName, pMD);
+      }
+    }
+  }
+
+  return pVal;
+}
+
+void DxbcConverter::SerializeDxil(SmallVectorImpl<char> &DxilBitcode) {
+  raw_svector_ostream DxilStream(DxilBitcode);
+  // a. Reserve header.
+  DxilProgramHeader Header = { 0 };
+  DxilStream.write((char*)&Header, sizeof(Header));
+  // b. Bitcode.
+  WriteBitcodeToFile(m_pModule.get(), DxilStream);
+  DxilStream.flush();
+  // c. Fix header.
+  uint32_t bitcodeSize = (uint32_t)DxilBitcode.size_in_bytes() - sizeof(DxilProgramHeader);
+  DxilProgramHeader *pHeader = (DxilProgramHeader*)DxilBitcode.data();
+  InitProgramHeader(*pHeader, EncodeVersion(m_pSM->GetKind(), m_pSM->GetMajor(), m_pSM->GetMinor()), DXIL::MakeDxilVersion(1, 0), bitcodeSize);
+  // d. Trailer. Pad to 16 bytes.
+  while (DxilBitcode.size() & 0xF) {
+    DxilBitcode.push_back(0);
+  }
+
+  IFTBOOL(DxilBitcode.size_in_bytes() < UINT_MAX && (DxilBitcode.size_in_bytes() & 0xF) == 0, DXC_E_DATA_TOO_LARGE);
+}
+
+} // namespace hlsl
+
+HRESULT CreateDxbcConverter(_In_ REFIID riid, _Out_ LPVOID *ppv) {
+  try {
+    CComPtr<hlsl::DxbcConverter> result(hlsl::DxbcConverter::Alloc(DxcGetThreadMallocNoRef()));
+    IFROOM(result.p);
+    return result.p->QueryInterface(riid, ppv);
+  }
+  CATCH_CPP_RETURN_HRESULT();
+}
+
+

+ 634 - 0
projects/dxilconv/lib/DxbcConverter/DxbcConverterImpl.h

@@ -0,0 +1,634 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// DxbcConverterImpl.h                                                       //
+// Copyright (c) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// Utilities to convert from DXBC to DXIL.                                   //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+#pragma once
+
+#include "dxc/DXIL/DXIL.h"
+#include "dxc/DxilContainer/DxilContainer.h"
+#include "dxc/DxilContainer/DxilContainerReader.h"
+#include "llvm/Analysis/ReducibilityAnalysis.h"
+#include "dxc/Support/Global.h"
+
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/FileOutputBuffer.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Bitcode/BitstreamWriter.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+
+#include <atlbase.h>
+#include "dxc/Support/microcom.h"
+#include "Support/DXIncludes.h"
+
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MSFileSystem.h"
+#include "dxc/Support/FileIOHelper.h"
+#include "dxc/dxcapi.h"
+
+#include "DxbcUtil.h"
+#include "DxbcConverter.h"
+
+#include "dxc/DxilContainer/DxilPipelineStateValidation.h"
+
+#include "Tracing/DxcRuntimeEtw.h"
+
+#include <vector>
+#include <map>
+#include <algorithm>
+
+#pragma once
+namespace llvm {
+using legacy::PassManagerBase;
+using legacy::PassManager;
+using legacy::FunctionPassManager;
+}
+
+using namespace llvm;
+using std::string;
+using std::wstring;
+using std::pair;
+using std::vector;
+using std::map;
+using std::unique_ptr;
+
+
+struct D3D12DDIARG_SIGNATURE_ENTRY_0012 {
+  D3D10_SB_NAME SystemValue;
+  UINT Register;
+  BYTE Mask;
+  BYTE Stream;
+  D3D10_SB_REGISTER_COMPONENT_TYPE RegisterComponentType;
+  D3D11_SB_OPERAND_MIN_PRECISION   MinPrecision;
+};
+
+
+namespace hlsl {
+
+#define DXBC_FOURCC(ch0, ch1, ch2, ch3)                              \
+            ((UINT)(BYTE)(ch0) | ((UINT)(BYTE)(ch1) << 8) |   \
+            ((UINT)(BYTE)(ch2) << 16) | ((UINT)(BYTE)(ch3) << 24 ))
+
+enum DXBCFourCC {
+  DXBC_GenericShader              = DXBC_FOURCC('S', 'H', 'D', 'R'),
+  DXBC_GenericShaderEx            = DXBC_FOURCC('S', 'H', 'E', 'X'),
+  DXBC_InputSignature             = DXBC_FOURCC('I', 'S', 'G', 'N'),
+  DXBC_InputSignature11_1         = DXBC_FOURCC('I', 'S', 'G', '1'), // == DFCC_InputSignature
+  DXBC_PatchConstantSignature     = DXBC_FOURCC('P', 'C', 'S', 'G'),
+  DXBC_PatchConstantSignature11_1 = DXBC_FOURCC('P', 'S', 'G', '1'), // == DFCC_PatchConstantSignature
+  DXBC_OutputSignature            = DXBC_FOURCC('O', 'S', 'G', 'N'),
+  DXBC_OutputSignature5           = DXBC_FOURCC('O', 'S', 'G', '5'),
+  DXBC_OutputSignature11_1        = DXBC_FOURCC('O', 'S', 'G', '1'), // == DFCC_OutputSignature
+  DXBC_ShaderFeatureInfo          = DXBC_FOURCC('S', 'F', 'I', '0'), // == DFCC_FeatureInfo
+  DXBC_RootSignature              = DXBC_FOURCC('R', 'T', 'S', '0'), // == DFCC_RootSignature
+  DXBC_DXIL                       = DXBC_FOURCC('D', 'X', 'I', 'L'), // == DFCC_DXIL
+  DXBC_PipelineStateValidation    = DXBC_FOURCC('P', 'S', 'V', '0'), // == DFCC_PipelineStateValidation
+};
+#undef DXBC_FOURCC
+
+
+/// Use this class to parse DXBC signatures.
+class SignatureHelper {
+public:
+  // Signature elements.
+  DxilSignature m_Signature;
+
+  // Use this to represent signature element record that comes from either:
+  // (1) DXBC signature blob, or (2) DDI signature vector.
+  struct ElementRecord {
+    string SemanticName;
+    unsigned SemanticIndex;
+    unsigned StartRow;
+    unsigned StartCol;
+    unsigned Rows;
+    unsigned Cols;
+    unsigned Stream;
+    CompType ComponentType;
+  };
+  vector<ElementRecord> m_ElementRecords;
+
+  // Use this to represent register range declaration.
+  struct Range {
+    unsigned StartRow;
+    unsigned StartCol;
+    unsigned Rows;
+    unsigned Cols;
+    BYTE OutputStream;
+
+    Range() : StartRow(UINT_MAX), StartCol(UINT_MAX), Rows(0), Cols(0), OutputStream(0) {}
+
+    unsigned GetStartRow() const { return StartRow; }
+    unsigned GetStartCol() const { return StartCol; }
+    unsigned GetEndRow() const { return StartRow + Rows - 1; }
+    unsigned GetEndCol() const { return StartCol + Cols - 1; }
+
+    struct LTRangeByStreamAndStartRowAndStartCol {
+      bool operator()(const Range &e1, const Range &e2) const {
+        if (e1.OutputStream < e2.OutputStream)
+          return true;
+        else if (e1.OutputStream == e2.OutputStream) {
+          if (e1.StartRow < e2.StartRow)
+            return true;
+          else if (e1.StartRow == e2.StartRow)
+            return e1.StartCol < e2.StartCol;
+          else
+            return false; // e1.StartRow > e2.StartRow
+        } else
+          return false; // e1.OutputStream > e2.OutputStream
+      }
+    };
+  };
+
+  vector<Range> m_Ranges;
+
+  // Use this to represent input/output/tessellation register declaration.
+  struct UsedElement {
+    unsigned Row;
+    unsigned StartCol;
+    unsigned Cols;
+    D3D_INTERPOLATION_MODE InterpolationMode;
+    D3D11_SB_OPERAND_MIN_PRECISION MinPrecision;
+    unsigned NumUnits;
+    BYTE OutputStream;
+
+    UsedElement() : Row(UINT_MAX), StartCol(UINT_MAX), Cols(0), 
+      InterpolationMode(D3D_INTERPOLATION_UNDEFINED), MinPrecision(D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT),
+      NumUnits(0), OutputStream(0) {}
+
+    struct LTByStreamAndStartRowAndStartCol {
+      bool operator()(const UsedElement &e1, const UsedElement &e2) const {
+        if (e1.OutputStream < e2.OutputStream)
+          return true;
+        else if (e1.OutputStream == e2.OutputStream) {
+          if (e1.Row < e2.Row)
+            return true;
+          else if (e1.Row == e2.Row)
+            return e1.StartCol < e2.StartCol;
+          else
+            return false; // e1.Row > e2.Row
+        } else
+          return false; // e1.OutputStream > e2.OutputStream
+      }
+    };
+  };
+
+  // Assume the vector is sorted by <stream,row,col>.
+  vector<UsedElement> m_UsedElements;
+
+  // Elements with stream, register and component.
+  struct RegAndCompAndStream {
+    unsigned Reg;
+    unsigned Comp;
+    unsigned Stream;
+    RegAndCompAndStream(unsigned r, unsigned c, unsigned s) : Reg(r), Comp(c), Stream(s) {}
+    bool operator<(const RegAndCompAndStream &o) const {
+      if (Stream < o.Stream)
+        return true;
+      else if (Stream == o.Stream) {
+        if (Reg < o.Reg)
+          return true;
+        else if (Reg == o.Reg)
+          return Comp < o.Comp;
+        else
+          return false;
+      } else
+        return false;
+    }
+  };
+  map<RegAndCompAndStream, unsigned> m_DxbcRegisterToSignatureElement;
+
+  const DxilSignatureElement *GetElement(unsigned Reg, unsigned Comp) const {
+    const unsigned Stream = 0;
+    return GetElementWithStream(Reg, Comp, Stream);
+  }
+  const DxilSignatureElement *GetElementWithStream(unsigned Reg, unsigned Comp, unsigned Stream) const {
+    RegAndCompAndStream Key(Reg, Comp, Stream);
+    auto it = m_DxbcRegisterToSignatureElement.find(Key);
+    if (it == m_DxbcRegisterToSignatureElement.end()) {
+      return nullptr;
+    }
+    unsigned ElemIdx = it->second;
+    const DxilSignatureElement *E = &m_Signature.GetElement(ElemIdx);
+    DXASSERT(E->IsAllocated(), "otherwise signature elements were not set correctly");
+    DXASSERT(E->GetStartRow() <= (int)Reg && (int)Reg < E->GetStartRow()+E->GetRows(), "otherwise signature elements were not set correctly");
+    DXASSERT(E->GetStartCol() <= (int)Comp && (int)Comp < E->GetStartCol()+E->GetCols(), "otherwise signature elements were not set correctly");
+    return E;
+  }
+
+  // Elements that are System Generated Values (SVGs), without register.
+  map<D3D10_SB_OPERAND_TYPE, unsigned> m_DxbcSgvToSignatureElement;
+
+  const DxilSignatureElement *GetElement(D3D10_SB_OPERAND_TYPE SgvRegType) const {
+    DXASSERT(m_DxbcSgvToSignatureElement.find(SgvRegType) != m_DxbcSgvToSignatureElement.end(), "otherwise the element has not been added to the map");
+    unsigned ElemIdx = m_DxbcSgvToSignatureElement.find(SgvRegType)->second;
+    const DxilSignatureElement *E = &m_Signature.GetElement(ElemIdx);
+    DXASSERT(!E->IsAllocated(), "otherwise signature elements were not set correctly");
+    return E;
+  }
+
+  bool IsInput() const { return m_Signature.IsInput(); }
+  bool IsOutput() const { return m_Signature.IsOutput(); }
+
+  // Special case SGVs that are not in the signature.
+  bool m_bHasInputCoverage;
+  bool m_bHasInnerInputCoverage;
+
+  SignatureHelper(DXIL::ShaderKind shaderKind, DXIL::SignatureKind sigKind)
+    : m_Signature(shaderKind, sigKind, /*useMinPrecision*/false)
+    , m_bHasInputCoverage(false)
+    , m_bHasInnerInputCoverage(false) {}
+};
+
+
+/// Use this class to implement the IDxbcConverter inteface for DXBC to DXIL translation.
+class DxbcConverter : public IDxbcConverter {
+protected:
+  DXC_MICROCOM_TM_REF_FIELDS();
+public:
+  DXC_MICROCOM_TM_ADDREF_RELEASE_IMPL();
+
+  HRESULT STDMETHODCALLTYPE QueryInterface(REFIID iid, LPVOID *ppv) {
+    return DoBasicQueryInterface<IDxbcConverter>(this, iid, ppv);
+  }
+
+  DxbcConverter();
+  DxbcConverter(IMalloc *pMalloc) : DxbcConverter() { m_pMalloc = pMalloc; }
+  DXC_MICROCOM_TM_ALLOC(DxbcConverter);
+
+  ~DxbcConverter();
+
+  __override HRESULT STDMETHODCALLTYPE Convert(_In_reads_bytes_(DxbcSize) LPCVOID pDxbc,
+                                               _In_ UINT32 DxbcSize,
+                                               _In_opt_z_ LPCWSTR pExtraOptions,
+                                               _Outptr_result_bytebuffer_maybenull_(*pDxilSize) LPVOID *ppDxil,
+                                               _Out_ UINT32 *pDxilSize,
+                                               _Outptr_result_maybenull_z_ LPWSTR *ppDiag); 
+
+  __override HRESULT STDMETHODCALLTYPE ConvertInDriver(_In_reads_bytes_(8) const UINT32 *pBytecode,
+                                                       _In_opt_z_ LPCVOID pInputSignature,
+                                                       _In_ UINT32 NumInputSignatureElements,
+                                                       _In_opt_z_ LPCVOID pOutputSignature,
+                                                       _In_ UINT32 NumOutputSignatureElements,
+                                                       _In_opt_z_ LPCVOID pPatchConstantSignature,
+                                                       _In_ UINT32 NumPatchConstantSignatureElements,
+                                                       _In_opt_z_ LPCWSTR pExtraOptions,
+                                                       _Out_ IDxcBlob **ppDxilModule,
+                                                       _Outptr_result_maybenull_z_ LPWSTR *ppDiag);
+
+protected:
+  LLVMContext m_Ctx;
+  DxilModule *m_pPR;
+  std::unique_ptr<Module> m_pModule;
+  OP *m_pOP;
+  const ShaderModel *m_pSM;
+  unsigned m_DxbcMajor;
+  unsigned m_DxbcMinor;
+  bool IsSM51Plus() const { return m_DxbcMajor > 5 || (m_DxbcMajor == 5 && m_DxbcMinor >= 1); }
+  std::unique_ptr< IRBuilder<> > m_pBuilder;
+  
+  bool m_bDisableHashCheck;
+  bool m_bRunDxilCleanup;
+
+  bool m_bLegacyCBufferLoad;
+
+  unique_ptr<SignatureHelper> m_pInputSignature;
+  unique_ptr<SignatureHelper> m_pOutputSignature;
+  unique_ptr<SignatureHelper> m_pPatchConstantSignature;
+  D3D10_SB_OPERAND_TYPE m_DepthRegType;
+  bool m_bHasStencilRef;
+  bool m_bHasCoverageOut;
+
+  const unsigned kRegCompAlignment = 4;
+  const unsigned kLegacyCBufferRegSizeInBytes = 16;
+
+  Value *m_pUnusedF32;
+  Value *m_pUnusedI32;
+
+  // Temporary r-registers.
+  unsigned m_NumTempRegs;
+
+  // Indexable temporary registers.
+  struct IndexableReg {
+    Value *pValue32;
+    Value *pValue16;
+    unsigned NumRegs;
+    unsigned NumComps;
+    bool bIsAlloca;
+  };
+  map<unsigned, IndexableReg> m_IndexableRegs;
+  map<unsigned, IndexableReg> m_PatchConstantIndexableRegs;
+
+  // Shader resource register/rangeID maps.
+  map<unsigned, unsigned> m_SRVRangeMap;
+  map<unsigned, unsigned> m_UAVRangeMap;
+  map<unsigned, unsigned> m_CBufferRangeMap;
+  map<unsigned, unsigned> m_SamplerRangeMap;
+
+  // Immediate constant buffer.
+  GlobalVariable *m_pIcbGV;
+
+  // Control flow.
+  struct Scope {
+    enum Kind : unsigned { Function, If, Loop, Switch, HullLoop, LastKind };
+
+    enum Kind Kind;
+    BasicBlock *pPreScopeBB;
+    BasicBlock *pPostScopeBB;
+    unsigned NameIndex;
+
+    union {
+      // If
+      struct {
+        BasicBlock *pThenBB;
+        BasicBlock *pElseBB;
+        Value *pCond;
+      };
+
+      // Loop
+      struct {
+        BasicBlock *pLoopBB;
+        unsigned ContinueIndex;
+        unsigned LoopBreakIndex;
+      };
+
+      // Switch
+      struct {
+        BasicBlock *pDefaultBB;
+        Value *pSelector;
+        unsigned CaseGroupIndex;
+        unsigned SwitchBreakIndex;
+      };
+
+      // Function
+      struct {
+        unsigned LabelIdx;
+        unsigned CallIdx;
+        unsigned ReturnTokenOffset;
+        unsigned ReturnIndex;
+        bool bEntryFunc;
+      };
+
+      // HullLoop
+      struct {
+        BasicBlock *pHullLoopBB;
+        unsigned HullLoopBreakIndex;
+        Value *pInductionVar;
+        unsigned HullLoopTripCount;
+      };
+    };
+    vector<pair<unsigned, BasicBlock*> > SwitchCases;  // Switch
+
+    Scope() : Kind(Kind::Function), pPreScopeBB(nullptr), pPostScopeBB(nullptr), NameIndex(0), 
+              pThenBB(nullptr), pElseBB(nullptr), pCond(nullptr),
+              pLoopBB(nullptr), ContinueIndex(0), LoopBreakIndex(0),
+              pDefaultBB(nullptr), pSelector(nullptr), CaseGroupIndex(0), SwitchBreakIndex(0),
+              LabelIdx(0), CallIdx(0), ReturnTokenOffset(0), ReturnIndex(0), bEntryFunc(false),
+              pHullLoopBB(nullptr), HullLoopBreakIndex(0), pInductionVar(nullptr), HullLoopTripCount(0) {}
+
+    void SetEntry(bool b = true) { DXASSERT_NOMSG(Kind==Function); bEntryFunc = b; }
+    bool IsEntry() const { DXASSERT_NOMSG(Kind==Function); return bEntryFunc; }
+  };
+
+  class ScopeStack {
+  public:
+    ScopeStack();
+    Scope &Top();
+    Scope &Push(enum Scope::Kind Kind, BasicBlock *pPreScopeBB);
+    void Pop();
+    bool IsEmpty() const;
+    Scope &FindParentLoop();
+    Scope &FindParentLoopOrSwitch();
+    Scope &FindParentFunction();
+    Scope &FindParentHullLoop();
+
+  private:
+    vector<Scope> m_Scopes;
+    unsigned m_FuncCount;
+    unsigned m_IfCount;
+    unsigned m_LoopCount;
+    unsigned m_SwitchCount;
+    unsigned m_HullLoopCount;
+  };
+  ScopeStack m_ScopeStack;
+
+  struct LabelEntry {
+    Function *pFunc;
+  };
+  map<unsigned, LabelEntry> m_Labels;
+  map<unsigned, LabelEntry> m_InterfaceFunctionBodies;
+  bool HasLabels() { return !m_Labels.empty() || !m_InterfaceFunctionBodies.empty(); }
+
+  // Shared memory.
+  struct TGSMEntry {
+    GlobalVariable *pVar;
+    unsigned Stride;
+    unsigned Count;
+    unsigned Id;
+  };
+  map<unsigned, TGSMEntry> m_TGSMMap;
+  unsigned m_TGSMCount;
+
+  // Geometry shader.
+  unsigned GetGSTempRegForOutputReg(unsigned OutputReg) const;
+
+  // Hull shader.
+  bool m_bControlPointPhase;
+  bool m_bPatchConstantPhase;
+  vector<unsigned> m_PatchConstantPhaseInstanceCounts;
+
+  CMask m_PreciseMask;
+
+  // Interfaces
+  DxilCBuffer* m_pInterfaceDataBuffer;
+  DxilCBuffer* m_pClassInstanceCBuffers;
+  DxilSampler* m_pClassInstanceSamplers;
+  DxilSampler* m_pClassInstanceComparisonSamplers;
+
+  struct InterfaceShaderResourceKey {
+    DxilResource::Kind Kind;
+    union {
+      DXIL::ComponentType TypedSRVRet;
+      unsigned StructureByteStride;
+    };
+    bool operator<(const InterfaceShaderResourceKey &o) const {
+      if (Kind != o.Kind)
+        return Kind < o.Kind;
+      if (Kind == DxilResource::Kind::StructuredBuffer)
+        return StructureByteStride < o.StructureByteStride;
+      if (Kind != DxilResource::Kind::RawBuffer)
+        return TypedSRVRet < o.TypedSRVRet;
+      return false;
+    }
+  };
+  map<InterfaceShaderResourceKey, unsigned> m_ClassInstanceSRVs;
+  map<unsigned, vector<unsigned>> m_FunctionTables;
+  struct Interface {
+    vector<unsigned> Tables;
+    bool bDynamicallyIndexed;
+    unsigned NumArrayEntries;
+  };
+  map<unsigned, Interface> m_Interfaces;
+  unsigned m_NumIfaces;
+  unsigned m_FcallCount;
+
+protected:
+  virtual void ConvertImpl(_In_reads_bytes_(DxbcSize) LPCVOID pDxbc,
+                           _In_ UINT32 DxbcSize,
+                           _In_opt_z_ LPCWSTR pExtraOptions,
+                           _Outptr_result_bytebuffer_maybenull_(*pDxilSize) LPVOID *ppDxil,
+                           _Out_ UINT32 *pDxilSize,
+                           _Outptr_result_maybenull_z_ LPWSTR *ppDiag);
+
+  virtual void ConvertInDriverImpl(_In_reads_bytes_(8) const UINT32 *pByteCode,
+                           _In_opt_ const D3D12DDIARG_SIGNATURE_ENTRY_0012 *pInputSignature,
+                           _In_ UINT32 NumInputSignatureElements,
+                           _In_opt_ const D3D12DDIARG_SIGNATURE_ENTRY_0012 *pOutputSignature,
+                           _In_ UINT32 NumOutputSignatureElements,
+                           _In_opt_ const D3D12DDIARG_SIGNATURE_ENTRY_0012 *pPatchConstantSignature,
+                           _In_ UINT32 NumPatchConstantSignatureElements,
+                           _In_opt_z_ LPCWSTR pExtraOptions,
+                           _Out_ IDxcBlob **ppDxcBlob,
+                           _Outptr_result_maybenull_z_ LPWSTR *ppDiag);
+
+  virtual void LogConvertResult(bool InDriver,
+                           _In_ const LARGE_INTEGER *pQPCConvertStart,
+                           _In_ const LARGE_INTEGER *pQPCConvertEnd,
+                           _In_reads_bytes_(DxbcSize) LPCVOID pDxbc,
+                           _In_ UINT32 DxbcSize,
+                           _In_opt_z_ LPCWSTR pExtraOptions,
+                           _In_reads_bytes_(ConvertedSize) LPCVOID pConverted,
+                           _In_opt_ UINT32 ConvertedSize,
+                           HRESULT hr);
+
+
+  // Callbacks added to support conversion of custom intrinsics.
+  virtual void HandleUnknownInstruction(D3D10ShaderBinary::CInstruction &Inst);
+  virtual unsigned GetResourceSlot(D3D10ShaderBinary::CInstruction &Inst);
+
+protected:
+  void ParseExtraOptions(const wchar_t *pStr);
+
+  void AnalyzeShader(D3D10ShaderBinary::CShaderCodeParser &Parser);
+
+  void ExtractInputSignatureFromDXBC(DxilContainerReader &dxbcReader, const void *pMaxPtr);
+  void ExtractOutputSignatureFromDXBC(DxilContainerReader &dxbcReader, const void *pMaxPtr);
+  void ExtractPatchConstantSignatureFromDXBC(DxilContainerReader &dxbcReader, const void *pMaxPtr);
+  void ExtractSignatureFromDXBC(const D3D10_INTERNALSHADER_SIGNATURE *pSig, UINT uElemSize,
+                                const void *pMaxPtr, SignatureHelper &SigHelper);
+  void ExtractSignatureFromDDI(const D3D12DDIARG_SIGNATURE_ENTRY_0012 *pElements, unsigned NumElements, SignatureHelper &SigHelper);
+  /// Correlates information from decls and signature element records to create DXIL signature element.
+  void ConvertSignature(SignatureHelper &SigHelper, DxilSignature &Sig);
+
+  void ConvertInstructions(D3D10ShaderBinary::CShaderCodeParser &Parser);
+  void AdvanceDxbcInstructionStream(D3D10ShaderBinary::CShaderCodeParser &Parser,
+                                    D3D10ShaderBinary::CInstruction &Inst, 
+                                    bool &bDoneParsing);
+  bool GetNextDxbcInstruction(D3D10ShaderBinary::CShaderCodeParser &Parser, D3D10ShaderBinary::CInstruction &NextInst);
+  void InsertSM50ResourceHandles();
+  void InsertInterfacesResourceDecls();
+  const DxilResource& GetInterfacesSRVDecl(D3D10ShaderBinary::CInstruction &Inst);
+  void DeclareIndexableRegisters();
+  void CleanupIndexableRegisterDecls(map<unsigned, IndexableReg> &IdxRegMap);
+  void RemoveUnreachableBasicBlocks();
+  void CleanupGEP();
+  
+  void ConvertUnary(OP::OpCode OpCode, const CompType &ElementType, D3D10ShaderBinary::CInstruction &Inst, 
+                    const unsigned DstIdx = 0, const unsigned SrcIdx = 1);
+  void ConvertBinary(OP::OpCode OpCode, const CompType &ElementType, D3D10ShaderBinary::CInstruction &Inst, 
+                     const unsigned DstIdx = 0, const unsigned SrcIdx1 = 1, const unsigned SrcIdx2 = 2);
+  void ConvertBinary(Instruction::BinaryOps OpCode, const CompType &ElementType, D3D10ShaderBinary::CInstruction &Inst, 
+                     const unsigned DstIdx = 0, const unsigned SrcIdx1 = 1, const unsigned SrcIdx2 = 2);
+  void ConvertBinaryWithTwoOuts(OP::OpCode OpCode, D3D10ShaderBinary::CInstruction &Inst, 
+                                const unsigned DstIdx1 = 0, const unsigned DstIdx2 = 1,
+                                const unsigned SrcIdx1 = 2, const unsigned SrcIdx2 = 3);
+  void ConvertBinaryWithCarry(OP::OpCode OpCode, D3D10ShaderBinary::CInstruction &Inst, 
+                              const unsigned DstIdx1 = 0, const unsigned DstIdx2 = 1,
+                              const unsigned SrcIdx1 = 2, const unsigned SrcIdx2 = 3);
+  void ConvertTertiary(OP::OpCode OpCode, const CompType &ElementType, D3D10ShaderBinary::CInstruction &Inst, 
+                       const unsigned DstIdx = 0,
+                       const unsigned SrcIdx1 = 1, const unsigned SrcIdx2 = 2, const unsigned SrcIdx3 = 3);
+  void ConvertQuaternary(OP::OpCode OpCode, const CompType &ElementType, D3D10ShaderBinary::CInstruction &Inst, 
+                         const unsigned DstIdx = 0,
+                         const unsigned SrcIdx1 = 1, const unsigned SrcIdx2 = 2,
+                         const unsigned SrcIdx3 = 3, const unsigned SrcIdx4 = 4);
+  void ConvertComparison(CmpInst::Predicate Predicate, const CompType &ElementType, D3D10ShaderBinary::CInstruction &Inst, 
+                         const unsigned DstIdx = 0, const unsigned SrcIdx1 = 1, const unsigned SrcIdx2 = 2);
+  void ConvertDotProduct(OP::OpCode OpCode, const BYTE NumComps, const CMask &LoadMask, D3D10ShaderBinary::CInstruction &Inst);
+  void ConvertCast(const CompType &SrcElementType, const CompType &DstElementType, D3D10ShaderBinary::CInstruction &Inst, 
+                   const unsigned DstIdx = 0, const unsigned SrcIdx = 1);
+  void ConvertToDouble(const CompType &SrcElementType, D3D10ShaderBinary::CInstruction &Inst);
+  void ConvertFromDouble(const CompType &DstElementType, D3D10ShaderBinary::CInstruction &Inst);
+  void LoadCommonSampleInputs(D3D10ShaderBinary::CInstruction &Inst, Value *pArgs[], bool bSetOffsets = true);
+  void StoreResRetOutputAndStatus(D3D10ShaderBinary::CInstruction &Inst, Value *pResRet, CompType DstType);
+  void StoreGetDimensionsOutput(D3D10ShaderBinary::CInstruction &Inst, Value *pGetDimRet);
+  void StoreSamplePosOutput(D3D10ShaderBinary::CInstruction &Inst, Value *pSamplePosVal);
+  void StoreBroadcastOutput(D3D10ShaderBinary::CInstruction &Inst, Value *pValue, CompType DstType);
+  Value *GetCoordValue(D3D10ShaderBinary::CInstruction &Inst, const unsigned uCoordIdx);
+  Value *GetByteOffset(D3D10ShaderBinary::CInstruction &Inst, const unsigned Idx1, const unsigned Idx2, const unsigned Stride);
+  void ConvertLoadTGSM(D3D10ShaderBinary::CInstruction &Inst, const unsigned uOpTGSM, const unsigned uOpOutput, CompType SrcType, Value *pByteOffset);
+  void ConvertStoreTGSM(D3D10ShaderBinary::CInstruction &Inst, const unsigned uOpTGSM, const unsigned uOpValue, CompType BaseValueType, Value *pByteOffset);
+
+  void EmitGSOutputRegisterStore(unsigned StreamId);
+
+  void SetShaderGlobalFlags(unsigned GlobalFlags);
+  Value *CreateHandle(DxilResourceBase::Class Class, unsigned RangeID, Value *pIndex, bool bNonUniformIndex);
+
+  void Optimize();
+  void AddOptimizationPasses(PassManagerBase &PassManager, unsigned OptLevel);
+
+  void CheckDxbcString(const char *pStr, const void *pMaxPtrInclusive);
+
+  Value *LoadConstFloat(float& fVal);
+  void LoadOperand(OperandValue &SrcVal, D3D10ShaderBinary::CInstruction &Inst, const unsigned OpIdx, const CMask &Mask, const CompType &ValueType);
+  const DxilResource& LoadSRVOperand(OperandValue &SrcVal, D3D10ShaderBinary::CInstruction &Inst, const unsigned OpIdx, const CMask &Mask, const CompType &ValueType);
+  const DxilResource& GetSRVFromOperand(D3D10ShaderBinary::CInstruction &Inst, const unsigned OpIdx);
+  void StoreOperand(OperandValue &DstVal, const D3D10ShaderBinary::CInstruction &Inst, const unsigned OpIdx, const CMask &Mask, const CompType &ValueType);
+  Value *LoadOperandIndex(const D3D10ShaderBinary::COperandIndex &OpIndex, const D3D10_SB_OPERAND_INDEX_REPRESENTATION IndexType);
+  Value *LoadOperandIndexRelative(const D3D10ShaderBinary::COperandIndex &OpIndex);
+  /// Implicit casts of a value.
+  Value *CastDxbcValue(Value *pValue, const CompType &SrcType, const CompType &DstType);
+  Value *CreateBitCast(Value *pValue, const CompType &SrcType, const CompType &DstType);
+  Value *ApplyOperandModifiers(Value *pValue, const D3D10ShaderBinary::COperandBase &O);
+  void ApplyInstructionModifiers(OperandValue &DstVal, const D3D10ShaderBinary::CInstruction &Inst);
+  CompType InferOperandType(const D3D10ShaderBinary::CInstruction &Inst, const unsigned OpIdx, const CMask &Mask);
+
+  void CreateBranchIfNeeded(BasicBlock *pBB, BasicBlock *pTargetBB);
+  Value *LoadZNZCondition(D3D10ShaderBinary::CInstruction &Inst, const unsigned OpIdx);
+  D3D11_SB_OPERAND_MIN_PRECISION GetHigherPrecision(D3D11_SB_OPERAND_MIN_PRECISION p1, D3D11_SB_OPERAND_MIN_PRECISION p2);
+
+  string SynthesizeResGVName(const char *pNamePrefix, unsigned ID);
+  StructType *GetStructResElemType(unsigned StructSizeInBytes);
+  StructType *GetTypedResElemType(CompType CT);
+  UndefValue *DeclareUndefPtr(Type *pType, unsigned AddrSpace);
+  Value *MarkPrecise(Value *pVal, BYTE Comp = BYTE(-1));
+
+  void SerializeDxil(SmallVectorImpl<char> &DxilBitcode);
+
+};
+}

+ 1053 - 0
projects/dxilconv/lib/DxbcConverter/DxbcUtil.cpp

@@ -0,0 +1,1053 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// DxbcUtil.cpp                                                              //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// Utilities to convert from DXBC to DXIL.                                   //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#include "dxc/Support/Global.h"
+#include "dxc/DXIL/DxilSampler.h"
+#include "dxc/DXIL/DxilResource.h"
+
+#include "llvm/Support/Casting.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Instructions.h"
+
+#include "Support/DXIncludes.h"
+#include "DxbcUtil.h"
+
+using namespace llvm;
+
+
+namespace hlsl {
+
+//------------------------------------------------------------------------------
+//
+//  CMask methods.
+//
+CMask::CMask() : m_Mask(0) {
+}
+
+CMask::CMask(BYTE Mask) : m_Mask(Mask) {
+  DXASSERT(Mask <= DXBC::kAllCompMask, "otherwise the caller did not check");
+}
+
+CMask::CMask(BYTE c0, BYTE c1, BYTE c2, BYTE c3) {
+  DXASSERT(c0<=1 && c1<=1 && c2<=1 && c3<=1, "otherwise the caller did not check");
+  m_Mask = c0 | (c1<<1) | (c2<<2) | (c3<<3);
+}
+
+CMask::CMask(BYTE StartComp, BYTE NumComp) {
+  DXASSERT(StartComp<DXBC::kAllCompMask && NumComp<=DXBC::kAllCompMask && (StartComp+NumComp-1)<DXBC::kAllCompMask, "otherwise the caller did not check");
+  m_Mask = 0;
+  for (BYTE c = StartComp; c < StartComp+NumComp; c++) {
+    m_Mask |= (1<<c);
+  }
+}
+
+BYTE CMask::ToByte() const {
+  DXASSERT(m_Mask <= DXBC::kAllCompMask, "otherwise the caller did not check");
+  return m_Mask;
+}
+
+static bool IsSet(BYTE Mask, BYTE c) {
+  return CMask(Mask).IsSet(c);
+}
+
+bool CMask::IsSet(BYTE c) const {
+  DXASSERT(c < DXBC::kWidth, "otherwise the caller did not check");
+  return (m_Mask & (1<<c)) != 0;
+}
+
+void CMask::Set(BYTE c) {
+  DXASSERT(c < DXBC::kWidth, "otherwise the caller did not check");
+  m_Mask = m_Mask | (1<<c);
+}
+
+CMask CMask::operator|(const CMask &o) {
+  return CMask(m_Mask | o.m_Mask);
+}
+
+BYTE CMask::GetNumActiveComps() const {
+  DXASSERT(m_Mask <= DXBC::kAllCompMask, "otherwise the caller did not check");
+  BYTE n = 0;
+  for (BYTE c = 0; c < DXBC::kWidth; c++) {
+    n += (m_Mask >> c) & 1;
+  }
+  return n;
+}
+
+BYTE CMask::GetNumActiveRangeComps() const {
+  DXASSERT(m_Mask <= DXBC::kAllCompMask, "otherwise the caller did not check");
+  if ((m_Mask & DXBC::kAllCompMask) == 0)
+    return 0;
+
+  BYTE FirstComp = 0;
+  for (BYTE c = 0; c < DXBC::kWidth; c++) {
+    if (m_Mask & (1 << c)) {
+      FirstComp = c;
+      break;
+    }
+  }
+  BYTE LastComp = 0;
+  for (BYTE c1 = 0; c1 < DXBC::kWidth; c1++) {
+    BYTE c = DXBC::kWidth - 1 - c1;
+    if (m_Mask & (1 << c)) {
+      LastComp = c;
+      break;
+    }
+  }
+
+  return LastComp - FirstComp + 1;
+}
+
+
+BYTE CMask::MakeMask(BYTE c0, BYTE c1, BYTE c2, BYTE c3) {
+  return CMask(c0,c1,c2,c3).ToByte();
+}
+
+CMask CMask::MakeXYZWMask() {
+  return CMask(DXBC::kAllCompMask);
+}
+
+CMask CMask::MakeFirstNCompMask(BYTE n) {
+  switch(n) {
+  case 0:   return CMask(0,0,0,0);
+  case 1:   return CMask(1,0,0,0);
+  case 2:   return CMask(1,1,0,0);
+  case 3:   return CMask(1,1,1,0);
+  default:  DXASSERT(n == 4, "otherwise the caller did not pass the right number of components");
+            return CMask(1,1,1,1);
+  }
+}
+
+CMask CMask::MakeCompMask(BYTE Component) {
+  DXASSERT(Component < DXBC::kWidth, "otherwise the caller should have checked that the mask is non-zero");
+  return CMask((BYTE)(1 << Component));
+}
+
+CMask CMask::MakeXMask() {
+  return MakeCompMask(0);
+}
+
+bool CMask::IsValidDoubleMask(const CMask &Mask) {
+  BYTE b = Mask.ToByte();
+  return b == 0xF || b == 0xC || b == 0x3;
+}
+
+CMask CMask::GetMaskForDoubleOperation(const CMask &Mask) {
+  switch (Mask.GetNumActiveComps()) {
+  case 0:   return CMask(0,0,0,0);
+  case 1:   return CMask(1,1,0,0);
+  case 2:   return CMask(1,1,1,1);
+  }
+  DXASSERT(false, "otherwise missed a case");
+  return CMask();
+}
+
+BYTE CMask::GetFirstActiveComp() const {
+  DXASSERT(m_Mask > 0, "otherwise the caller should have checked that the mask is non-zero");
+  for (BYTE c = 0; c < DXBC::kWidth; c++) {
+    if ((m_Mask >> c) & 1)
+      return c;
+  }
+  return _UI8_MAX;
+}
+
+CMask CMask::FromDXBC(const unsigned DxbcMask) {
+  return CMask(DxbcMask >> D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT);
+}
+
+
+//------------------------------------------------------------------------------
+//
+//  OperandValue methods.
+//
+OperandValue::OperandValue() {
+  m_pVal[0] = m_pVal[1] = m_pVal[2] = m_pVal[3] = nullptr;
+}
+
+OperandValue::PValue &OperandValue::operator[](BYTE c) {
+  DXASSERT_NOMSG(c < DXBC::kWidth);
+  return m_pVal[c];
+}
+
+const OperandValue::PValue &OperandValue::operator[](BYTE c) const {
+  DXASSERT_NOMSG(c < DXBC::kWidth);
+  DXASSERT(m_pVal[c] != nullptr, "otherwise required component value has not been set");
+  return m_pVal[c];
+}
+
+//------------------------------------------------------------------------------
+//
+//  OperandValue methods.
+//
+OperandValueHelper::OperandValueHelper() 
+: m_pOpValue(nullptr)
+, m_Index(DXBC::kWidth) {
+  m_Components[0] = m_Components[1] = m_Components[2] = m_Components[3] = kBadComp;
+}
+
+OperandValueHelper::OperandValueHelper(OperandValue &OpValue, const CMask &Mask, const D3D10ShaderBinary::COperandBase &O)
+: m_pOpValue(&OpValue) {
+    switch (O.m_ComponentSelection) {
+    case D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE:
+      Initialize(Mask, O.m_Swizzle);
+      break;
+    case D3D10_SB_OPERAND_4_COMPONENT_SELECT_1_MODE: {
+      BYTE Swizzle[DXBC::kWidth] = { (BYTE)O.m_ComponentName, (BYTE)O.m_ComponentName, 
+                                     (BYTE)O.m_ComponentName, (BYTE)O.m_ComponentName };
+      Initialize(Mask, Swizzle);
+      break;
+    }
+    case D3D10_SB_OPERAND_4_COMPONENT_MASK_MODE: {
+      BYTE Swizzle[DXBC::kWidth] = { (BYTE)D3D10_SB_4_COMPONENT_X, (BYTE)D3D10_SB_4_COMPONENT_Y, 
+                                     (BYTE)D3D10_SB_4_COMPONENT_Z, (BYTE)D3D10_SB_4_COMPONENT_W };
+      Initialize(Mask, Swizzle);
+      break;
+    }
+    default:
+      DXASSERT_DXBC(false);
+    }
+}
+
+void OperandValueHelper::Initialize(const CMask &Mask, const BYTE CompSwizzle[DXBC::kWidth]) {
+  DXASSERT(Mask.GetNumActiveComps() > 0, "otherwise the caller passed incorrect mask");
+  for (BYTE c = 0; c < DXBC::kWidth; c++) {
+    DXASSERT(m_pOpValue->m_pVal[c] == nullptr, "otherwise the caller passed a stale/corrupt OpValue");
+    if (Mask.IsSet(c))
+      m_Components[c] = CompSwizzle[c];
+    else
+      m_Components[c] = kBadComp;
+  }
+  for (m_Index = 0; m_Index < DXBC::kWidth; m_Index++)
+    if (m_Components[m_Index] != kBadComp)
+      break;
+  DXASSERT_NOMSG(m_Index < DXBC::kWidth);
+}
+
+BYTE OperandValueHelper::GetComp() const {
+  return (m_Index < DXBC::kWidth) ? m_Components[m_Index] : kBadComp;
+}
+
+bool OperandValueHelper::IsDone() const {
+  return m_Index == DXBC::kWidth;
+}
+
+void OperandValueHelper::Advance() {
+  if (IsDone()) {
+    DXASSERT(false, "otherwise Advance got called past the last active component, which is not the intended use");
+    return;
+  }
+
+  // 1. Look for the next component that needs a value.
+  // 2. Disable m_Components[c] that are equal to Comp to iterate only through unique components.
+  BYTE Comp = m_Components[m_Index];
+  DXASSERT_NOMSG(Comp < DXBC::kWidth);
+  m_Components[m_Index] = kBadComp;
+  BYTE StartComp = m_Index + 1;
+  m_Index = DXBC::kWidth;
+  for (BYTE c = StartComp; c < DXBC::kWidth; c++) {
+    if (m_Components[c] == Comp) {
+      m_Components[c] = kBadComp;
+    } else if (m_Components[c] != kBadComp) {
+      if (m_Index == DXBC::kWidth)
+        m_Index = c;
+    }
+  }
+}
+
+void OperandValueHelper::SetValue(llvm::Value *pValue) {
+  DXASSERT(m_Index < DXBC::kWidth, "otherwise the client uses the instance after all unique components have been set");
+  DXASSERT(m_pOpValue->m_pVal[m_Index] == nullptr, "otherwise the client tried to redefine a value, which is not the intended use");
+  BYTE Comp = m_Components[m_Index];
+  DXASSERT_NOMSG(Comp < DXBC::kWidth);
+  for (BYTE c = m_Index; c < DXBC::kWidth; c++) {
+    if (m_Components[c] == Comp) {
+      DXASSERT_NOMSG(m_pOpValue->m_pVal[c] == nullptr);
+      m_pOpValue->m_pVal[c] = pValue;
+    }
+  }
+}
+
+
+//------------------------------------------------------------------------------
+//
+//  DXBC namespace functions.
+//
+namespace DXBC {
+
+ShaderModel::Kind GetShaderModelKind(D3D10_SB_TOKENIZED_PROGRAM_TYPE Type) {
+  switch (Type) {
+  case D3D10_SB_PIXEL_SHADER:     return ShaderModel::Kind::Pixel;
+  case D3D10_SB_VERTEX_SHADER:    return ShaderModel::Kind::Vertex;
+  case D3D10_SB_GEOMETRY_SHADER:  return ShaderModel::Kind::Geometry;
+  case D3D11_SB_HULL_SHADER:      return ShaderModel::Kind::Hull;
+  case D3D11_SB_DOMAIN_SHADER:    return ShaderModel::Kind::Domain;
+  case D3D11_SB_COMPUTE_SHADER:   return ShaderModel::Kind::Compute;
+  default:                        return ShaderModel::Kind::Invalid;
+  }
+}
+
+bool IsFlagDisableOptimizations         (unsigned Flags) { return (Flags & D3D11_1_SB_GLOBAL_FLAG_SKIP_OPTIMIZATION) != 0; }
+bool IsFlagDisableMathRefactoring       (unsigned Flags) { return (Flags & D3D10_SB_GLOBAL_FLAG_REFACTORING_ALLOWED) == 0; }
+bool IsFlagEnableDoublePrecision        (unsigned Flags) { return (Flags & D3D11_SB_GLOBAL_FLAG_ENABLE_DOUBLE_PRECISION_FLOAT_OPS) != 0; }
+bool IsFlagForceEarlyDepthStencil       (unsigned Flags) { return (Flags & D3D11_SB_GLOBAL_FLAG_FORCE_EARLY_DEPTH_STENCIL) != 0; }
+bool IsFlagEnableRawAndStructuredBuffers(unsigned Flags) { return (Flags & D3D11_SB_GLOBAL_FLAG_ENABLE_RAW_AND_STRUCTURED_BUFFERS) != 0; }
+bool IsFlagEnableMinPrecision           (unsigned Flags) { return (Flags & D3D11_1_SB_GLOBAL_FLAG_ENABLE_MINIMUM_PRECISION) != 0; }
+bool IsFlagEnableDoubleExtensions       (unsigned Flags) { return (Flags & D3D11_1_SB_GLOBAL_FLAG_ENABLE_DOUBLE_EXTENSIONS) != 0; }
+bool IsFlagEnableMSAD                   (unsigned Flags) { return (Flags & D3D11_1_SB_GLOBAL_FLAG_ENABLE_SHADER_EXTENSIONS) != 0; }
+bool IsFlagAllResourcesBound            (unsigned Flags) { return (Flags & D3D12_SB_GLOBAL_FLAG_ALL_RESOURCES_BOUND) != 0; }
+
+InterpolationMode::Kind GetInterpolationModeKind(D3D_INTERPOLATION_MODE Mode) {
+  switch (Mode) {
+  case D3D_INTERPOLATION_UNDEFINED:                     return InterpolationMode::Kind::Undefined;
+  case D3D_INTERPOLATION_CONSTANT:                      return InterpolationMode::Kind::Constant;
+  case D3D_INTERPOLATION_LINEAR:                        return InterpolationMode::Kind::Linear;
+  case D3D_INTERPOLATION_LINEAR_CENTROID:               return InterpolationMode::Kind::LinearCentroid;
+  case D3D_INTERPOLATION_LINEAR_NOPERSPECTIVE:          return InterpolationMode::Kind::LinearNoperspective;
+  case D3D_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID: return InterpolationMode::Kind::LinearNoperspectiveCentroid;
+  case D3D_INTERPOLATION_LINEAR_SAMPLE:                 return InterpolationMode::Kind::LinearSample;
+  case D3D_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE:   return InterpolationMode::Kind::LinearNoperspectiveSample;
+  }
+  DXASSERT(false, "otherwise the caller did not check the range");
+  return InterpolationMode::Kind::Invalid;
+}
+
+D3D10_SB_OPERAND_TYPE GetOperandRegType(Semantic::Kind Kind, bool IsOutput) {
+  switch (Kind) {
+  case Semantic::Kind::Coverage:
+    if (IsOutput) return D3D10_SB_OPERAND_TYPE_OUTPUT_COVERAGE_MASK;
+    else          return D3D11_SB_OPERAND_TYPE_INPUT_COVERAGE_MASK;
+  case Semantic::Kind::InnerCoverage:     return D3D11_SB_OPERAND_TYPE_INNER_COVERAGE;
+  case Semantic::Kind::PrimitiveID:       return D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID;
+  case Semantic::Kind::Depth:             return D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH;
+  case Semantic::Kind::DepthLessEqual:    return D3D11_SB_OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL;
+  case Semantic::Kind::DepthGreaterEqual: return D3D11_SB_OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL;
+  case Semantic::Kind::StencilRef:        return D3D11_SB_OPERAND_TYPE_OUTPUT_STENCIL_REF;
+  }
+  DXASSERT(false, "otherwise the caller passed wrong semantic type");
+  return D3D10_SB_OPERAND_TYPE_TEMP;
+}
+
+DxilResource::Kind GetResourceKind(D3D10_SB_RESOURCE_DIMENSION ResType) {
+  switch (ResType) {
+  case D3D10_SB_RESOURCE_DIMENSION_UNKNOWN:           return DxilResource::Kind::Invalid;
+  case D3D10_SB_RESOURCE_DIMENSION_BUFFER:            return DxilResource::Kind::TypedBuffer;
+  case D3D10_SB_RESOURCE_DIMENSION_TEXTURE1D:         return DxilResource::Kind::Texture1D;
+  case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2D:         return DxilResource::Kind::Texture2D;
+  case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DMS:       return DxilResource::Kind::Texture2DMS;
+  case D3D10_SB_RESOURCE_DIMENSION_TEXTURE3D:         return DxilResource::Kind::Texture3D;
+  case D3D10_SB_RESOURCE_DIMENSION_TEXTURECUBE:       return DxilResource::Kind::TextureCube;
+  case D3D10_SB_RESOURCE_DIMENSION_TEXTURE1DARRAY:    return DxilResource::Kind::Texture1DArray;
+  case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DARRAY:    return DxilResource::Kind::Texture2DArray;
+  case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DMSARRAY:  return DxilResource::Kind::Texture2DMSArray;
+  case D3D10_SB_RESOURCE_DIMENSION_TEXTURECUBEARRAY:  return DxilResource::Kind::TextureCubeArray;
+  case D3D11_SB_RESOURCE_DIMENSION_RAW_BUFFER:        return DxilResource::Kind::RawBuffer;
+  case D3D11_SB_RESOURCE_DIMENSION_STRUCTURED_BUFFER: return DxilResource::Kind::RawBuffer;
+  }
+  DXASSERT(false, "otherwise the caller did not check the range");
+  return DxilResource::Kind::Invalid;
+}
+
+BYTE GetNumResCoords(DxilResource::Kind ResKind) {
+  switch (ResKind) {
+  case DxilResource::Kind::Texture1D:         return 1;
+  case DxilResource::Kind::Texture2D:         return 2;
+  case DxilResource::Kind::Texture2DMS:       return 2;
+  case DxilResource::Kind::Texture3D:         return 3;
+  case DxilResource::Kind::TextureCube:       return 3;
+  case DxilResource::Kind::Texture1DArray:    return 2;
+  case DxilResource::Kind::Texture2DArray:    return 3;
+  case DxilResource::Kind::Texture2DMSArray:  return 3;
+  case DxilResource::Kind::TextureCubeArray:  return 4;
+  case DxilResource::Kind::TypedBuffer:       return 1;
+  case DxilResource::Kind::RawBuffer:         return 1;
+  }
+  DXASSERT(false, "otherwise the caller did not pass correct resource kind");
+  return 0;
+}
+
+BYTE GetNumResOffsets(DxilResource::Kind ResKind) {
+  switch (ResKind) {
+  case DxilResource::Kind::Texture1D:         return 1;
+  case DxilResource::Kind::Texture2D:         return 2;
+  case DxilResource::Kind::Texture2DMS:       return 2;
+  case DxilResource::Kind::Texture3D:         return 3;
+  case DxilResource::Kind::TextureCube:       return 3;
+  case DxilResource::Kind::Texture1DArray:    return 1;
+  case DxilResource::Kind::Texture2DArray:    return 2;
+  case DxilResource::Kind::Texture2DMSArray:  return 2;
+  case DxilResource::Kind::TextureCubeArray:  return 3;
+  case DxilResource::Kind::TypedBuffer:       return 0;
+  case DxilResource::Kind::RawBuffer:         return 0;
+  }
+  DXASSERT(false, "otherwise the caller did not pass correct resource kind");
+  return 0;
+}
+
+CompType GetCompType(D3D_REGISTER_COMPONENT_TYPE CompTy) {
+  switch (CompTy) {
+  case D3D_REGISTER_COMPONENT_FLOAT32:    return CompType::getF32();
+  case D3D_REGISTER_COMPONENT_SINT32:     return CompType::getI32();
+  case D3D_REGISTER_COMPONENT_UINT32:     return CompType::getU32();
+  }
+  DXASSERT(false, "incorrect component type value");
+  return CompType();
+}
+
+CompType GetCompTypeWithMinPrec(D3D_REGISTER_COMPONENT_TYPE BaseCompTy,
+                                D3D11_SB_OPERAND_MIN_PRECISION MinPrec) {
+  switch (BaseCompTy) {
+  case D3D_REGISTER_COMPONENT_FLOAT32:
+    switch (MinPrec) {
+    case D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT:   return CompType::getF32();
+    case D3D11_SB_OPERAND_MIN_PRECISION_FLOAT_16:  __fallthrough;
+    case D3D11_SB_OPERAND_MIN_PRECISION_FLOAT_2_8: return CompType::getF16();
+    }
+    break;
+  case D3D_REGISTER_COMPONENT_SINT32:
+    switch (MinPrec) {
+    case D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT:   return CompType::getI32();
+    case D3D11_SB_OPERAND_MIN_PRECISION_SINT_16:   return CompType::getI16();
+    case D3D11_SB_OPERAND_MIN_PRECISION_UINT_16:   return CompType::getU16();
+    }
+    break;
+  case D3D_REGISTER_COMPONENT_UINT32:
+    switch (MinPrec) {
+    case D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT:   return CompType::getU32();
+    case D3D11_SB_OPERAND_MIN_PRECISION_SINT_16:   return CompType::getI16();
+    case D3D11_SB_OPERAND_MIN_PRECISION_UINT_16:   return CompType::getU16();
+    }
+    break;
+  }
+  DXASSERT(false, "otherwise incorrect combination of type and min-precision");
+  return CompType();
+}
+
+CompType GetCompTypeWithMinPrec(CompType BaseCompTy,
+                                D3D11_SB_OPERAND_MIN_PRECISION MinPrec) {
+  switch (BaseCompTy.GetKind()) {
+  case CompType::Kind::F32:
+    switch (MinPrec) {
+    case D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT:   return CompType::getF32();
+    case D3D11_SB_OPERAND_MIN_PRECISION_FLOAT_16:  __fallthrough;
+    case D3D11_SB_OPERAND_MIN_PRECISION_FLOAT_2_8: return CompType::getF16();
+    }
+    break;
+  case CompType::Kind::I32:
+    switch (MinPrec) {
+    case D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT:   return CompType::getI32();
+    case D3D11_SB_OPERAND_MIN_PRECISION_SINT_16:   return CompType::getI16();
+    case D3D11_SB_OPERAND_MIN_PRECISION_UINT_16:   return CompType::getU16();
+    }
+    break;
+  case CompType::Kind::U32:
+    switch (MinPrec) {
+    case D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT:   return CompType::getU32();
+    case D3D11_SB_OPERAND_MIN_PRECISION_SINT_16:   return CompType::getI16();
+    case D3D11_SB_OPERAND_MIN_PRECISION_UINT_16:   return CompType::getU16();
+    }
+    break;
+  case CompType::Kind::F64:
+    return CompType::getF64();
+  }
+  DXASSERT(false, "otherwise incorrect combination of type and min-precision");
+  return CompType();
+}
+
+static CompType GetFullPrecCompType(CompType CompTy) {
+  switch (CompTy.GetKind()) {
+  case CompType::Kind::F16:   return CompType::getF32();
+  case CompType::Kind::I16:   return CompType::getI32();
+  case CompType::Kind::U16:   return CompType::getU32();
+  default:                    return CompTy;
+  }
+}
+
+CompType GetCompTypeFromMinPrec(D3D11_SB_OPERAND_MIN_PRECISION MinPrec,
+                                CompType DefaultPrecCompType) {
+  switch (MinPrec) {
+  case D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT:    return GetFullPrecCompType(DefaultPrecCompType);
+  case D3D11_SB_OPERAND_MIN_PRECISION_FLOAT_16:   __fallthrough;
+  case D3D11_SB_OPERAND_MIN_PRECISION_FLOAT_2_8:  return CompType::getF16();
+  case D3D11_SB_OPERAND_MIN_PRECISION_SINT_16:    return CompType::getI16();
+  case D3D11_SB_OPERAND_MIN_PRECISION_UINT_16:    return CompType::getU16();
+  default: DXASSERT_DXBC(false);                  return GetFullPrecCompType(DefaultPrecCompType);
+  }
+}
+
+CompType GetResCompType(D3D10_SB_RESOURCE_RETURN_TYPE CompTy) {
+  switch(CompTy) {
+  case D3D10_SB_RETURN_TYPE_UNORM:                return CompType::getF32();
+  case D3D10_SB_RETURN_TYPE_SNORM:                return CompType::getF32();
+  case D3D10_SB_RETURN_TYPE_SINT:                 return CompType::getI32();
+  case D3D10_SB_RETURN_TYPE_UINT:                 return CompType::getU32();
+  case D3D10_SB_RETURN_TYPE_FLOAT:                return CompType::getF32();
+  case D3D10_SB_RETURN_TYPE_MIXED:                return CompType::getInvalid();
+  case D3D11_SB_RETURN_TYPE_DOUBLE:               return CompType::getF64();
+  case D3D11_SB_RETURN_TYPE_CONTINUED:            return CompType::getInvalid();
+  case D3D11_SB_RETURN_TYPE_UNUSED:               return CompType::getInvalid();
+  default: DXASSERT(false, "invalid comp type");  return CompType::getInvalid();
+  }
+}
+
+CompType GetDeclResCompType(D3D10_SB_RESOURCE_RETURN_TYPE CompTy) {
+  switch(CompTy) {
+  case D3D10_SB_RETURN_TYPE_UNORM:                return CompType::getUNormF32();
+  case D3D10_SB_RETURN_TYPE_SNORM:                return CompType::getSNormF32();
+  case D3D10_SB_RETURN_TYPE_SINT:                 return CompType::getI32();
+  case D3D10_SB_RETURN_TYPE_UINT:                 return CompType::getU32();
+  case D3D10_SB_RETURN_TYPE_FLOAT:                return CompType::getF32();
+  case D3D10_SB_RETURN_TYPE_MIXED:                return CompType::getInvalid();
+  case D3D11_SB_RETURN_TYPE_DOUBLE:               return CompType::getF64();
+  case D3D11_SB_RETURN_TYPE_CONTINUED:            return CompType::getInvalid();
+  case D3D11_SB_RETURN_TYPE_UNUSED:               return CompType::getInvalid();
+  default: DXASSERT(false, "invalid comp type");  return CompType::getInvalid();
+  }
+}
+
+static const char s_ComponentName[kWidth] = { 'x', 'y', 'z', 'w' };
+char GetCompName(BYTE c) {
+  DXASSERT(c < kWidth, "otherwise the caller did not pass the right component value");
+  return s_ComponentName[c];
+}
+
+DxilSampler::SamplerKind GetSamplerKind(D3D10_SB_SAMPLER_MODE Mode) {
+  switch(Mode) {
+  case D3D10_SB_SAMPLER_MODE_DEFAULT:     return DxilSampler::SamplerKind::Default;
+  case D3D10_SB_SAMPLER_MODE_COMPARISON:  return DxilSampler::SamplerKind::Comparison;
+  case D3D10_SB_SAMPLER_MODE_MONO:        return DxilSampler::SamplerKind::Mono;
+  }
+  DXASSERT(false, "otherwise the caller did not pass the right Mode");
+  return DxilSampler::SamplerKind::Invalid;
+}
+
+unsigned GetRegIndex(unsigned Reg, unsigned Comp) {
+  return Reg * 4 + Comp;
+}
+
+DXIL::AtomicBinOpCode GetAtomicBinOp(D3D10_SB_OPCODE_TYPE DxbcOpCode) {
+  switch (DxbcOpCode) {
+  case D3D11_SB_OPCODE_ATOMIC_IADD:           return DXIL::AtomicBinOpCode::Add;
+  case D3D11_SB_OPCODE_ATOMIC_AND:            return DXIL::AtomicBinOpCode::And;
+  case D3D11_SB_OPCODE_ATOMIC_OR:             return DXIL::AtomicBinOpCode::Or;
+  case D3D11_SB_OPCODE_ATOMIC_XOR:            return DXIL::AtomicBinOpCode::Xor;
+  case D3D11_SB_OPCODE_ATOMIC_IMAX:           return DXIL::AtomicBinOpCode::IMax;
+  case D3D11_SB_OPCODE_ATOMIC_IMIN:           return DXIL::AtomicBinOpCode::IMin;
+  case D3D11_SB_OPCODE_ATOMIC_UMAX:           return DXIL::AtomicBinOpCode::UMax;
+  case D3D11_SB_OPCODE_ATOMIC_UMIN:           return DXIL::AtomicBinOpCode::UMin;
+  case D3D11_SB_OPCODE_IMM_ATOMIC_EXCH:       return DXIL::AtomicBinOpCode::Exchange;
+  case D3D11_SB_OPCODE_IMM_ATOMIC_IADD:       return DXIL::AtomicBinOpCode::Add;
+  case D3D11_SB_OPCODE_IMM_ATOMIC_AND:        return DXIL::AtomicBinOpCode::And;
+  case D3D11_SB_OPCODE_IMM_ATOMIC_OR:         return DXIL::AtomicBinOpCode::Or;
+  case D3D11_SB_OPCODE_IMM_ATOMIC_XOR:        return DXIL::AtomicBinOpCode::Xor;
+  case D3D11_SB_OPCODE_IMM_ATOMIC_IMAX:       return DXIL::AtomicBinOpCode::IMax;
+  case D3D11_SB_OPCODE_IMM_ATOMIC_IMIN:       return DXIL::AtomicBinOpCode::IMin;
+  case D3D11_SB_OPCODE_IMM_ATOMIC_UMAX:       return DXIL::AtomicBinOpCode::UMax;
+  case D3D11_SB_OPCODE_IMM_ATOMIC_UMIN:       return DXIL::AtomicBinOpCode::UMin;
+
+  case D3D11_SB_OPCODE_ATOMIC_CMP_STORE:
+  case D3D11_SB_OPCODE_IMM_ATOMIC_CMP_EXCH:
+  default:                                    DXASSERT(false, "otherwise the caller did not pass the right OpCode");
+  }
+
+  return DXIL::AtomicBinOpCode::Invalid;
+}
+
+llvm::AtomicRMWInst::BinOp GetLlvmAtomicBinOp(D3D10_SB_OPCODE_TYPE DxbcOpCode) {
+  switch (DxbcOpCode) {
+  case D3D11_SB_OPCODE_ATOMIC_IADD:           return llvm::AtomicRMWInst::Add;
+  case D3D11_SB_OPCODE_ATOMIC_AND:            return llvm::AtomicRMWInst::And;
+  case D3D11_SB_OPCODE_ATOMIC_OR:             return llvm::AtomicRMWInst::Or;
+  case D3D11_SB_OPCODE_ATOMIC_XOR:            return llvm::AtomicRMWInst::Xor;
+  case D3D11_SB_OPCODE_ATOMIC_IMAX:           return llvm::AtomicRMWInst::Max;
+  case D3D11_SB_OPCODE_ATOMIC_IMIN:           return llvm::AtomicRMWInst::Min;
+  case D3D11_SB_OPCODE_ATOMIC_UMAX:           return llvm::AtomicRMWInst::UMax;
+  case D3D11_SB_OPCODE_ATOMIC_UMIN:           return llvm::AtomicRMWInst::UMin;
+  case D3D11_SB_OPCODE_IMM_ATOMIC_EXCH:       return llvm::AtomicRMWInst::Xchg;
+  case D3D11_SB_OPCODE_IMM_ATOMIC_IADD:       return llvm::AtomicRMWInst::Add;
+  case D3D11_SB_OPCODE_IMM_ATOMIC_AND:        return llvm::AtomicRMWInst::And;
+  case D3D11_SB_OPCODE_IMM_ATOMIC_OR:         return llvm::AtomicRMWInst::Or;
+  case D3D11_SB_OPCODE_IMM_ATOMIC_XOR:        return llvm::AtomicRMWInst::Xor;
+  case D3D11_SB_OPCODE_IMM_ATOMIC_IMAX:       return llvm::AtomicRMWInst::Max;
+  case D3D11_SB_OPCODE_IMM_ATOMIC_IMIN:       return llvm::AtomicRMWInst::Min;
+  case D3D11_SB_OPCODE_IMM_ATOMIC_UMAX:       return llvm::AtomicRMWInst::UMax;
+  case D3D11_SB_OPCODE_IMM_ATOMIC_UMIN:       return llvm::AtomicRMWInst::UMin;
+
+  case D3D11_SB_OPCODE_ATOMIC_CMP_STORE:
+  case D3D11_SB_OPCODE_IMM_ATOMIC_CMP_EXCH:
+  default:
+                                              DXASSERT(false, "otherwise the caller did not pass the right OpCode");
+  }
+  
+  return llvm::AtomicRMWInst::BAD_BINOP;
+}
+
+bool AtomicBinOpHasReturn(D3D10_SB_OPCODE_TYPE DxbcOpCode) {
+  switch (DxbcOpCode)
+  {
+  case D3D11_SB_OPCODE_ATOMIC_AND:
+  case D3D11_SB_OPCODE_ATOMIC_OR:
+  case D3D11_SB_OPCODE_ATOMIC_XOR:
+  case D3D11_SB_OPCODE_ATOMIC_IADD:
+  case D3D11_SB_OPCODE_ATOMIC_IMAX:
+  case D3D11_SB_OPCODE_ATOMIC_IMIN:
+  case D3D11_SB_OPCODE_ATOMIC_UMAX:
+  case D3D11_SB_OPCODE_ATOMIC_UMIN:
+  case D3D11_SB_OPCODE_ATOMIC_CMP_STORE:
+    return false;
+
+  case D3D11_SB_OPCODE_IMM_ATOMIC_IADD:
+  case D3D11_SB_OPCODE_IMM_ATOMIC_AND:
+  case D3D11_SB_OPCODE_IMM_ATOMIC_OR:
+  case D3D11_SB_OPCODE_IMM_ATOMIC_XOR:
+  case D3D11_SB_OPCODE_IMM_ATOMIC_EXCH:
+  case D3D11_SB_OPCODE_IMM_ATOMIC_IMAX:
+  case D3D11_SB_OPCODE_IMM_ATOMIC_IMIN:
+  case D3D11_SB_OPCODE_IMM_ATOMIC_UMAX:
+  case D3D11_SB_OPCODE_IMM_ATOMIC_UMIN:
+  case D3D11_SB_OPCODE_IMM_ATOMIC_CMP_EXCH:
+    return true;
+  }
+
+  DXASSERT(false, "otherwise the caller did not pass the right OpCode");
+  return false;
+}
+
+bool IsCompareExchAtomicBinOp(D3D10_SB_OPCODE_TYPE DxbcOpCode) {
+  switch (DxbcOpCode)
+  {
+  case D3D11_SB_OPCODE_ATOMIC_CMP_STORE:
+  case D3D11_SB_OPCODE_IMM_ATOMIC_CMP_EXCH:
+    return true;
+
+  case D3D11_SB_OPCODE_ATOMIC_AND:
+  case D3D11_SB_OPCODE_ATOMIC_OR:
+  case D3D11_SB_OPCODE_ATOMIC_XOR:
+  case D3D11_SB_OPCODE_ATOMIC_IADD:
+  case D3D11_SB_OPCODE_ATOMIC_IMAX:
+  case D3D11_SB_OPCODE_ATOMIC_IMIN:
+  case D3D11_SB_OPCODE_ATOMIC_UMAX:
+  case D3D11_SB_OPCODE_ATOMIC_UMIN:
+  case D3D11_SB_OPCODE_IMM_ATOMIC_IADD:
+  case D3D11_SB_OPCODE_IMM_ATOMIC_AND:
+  case D3D11_SB_OPCODE_IMM_ATOMIC_OR:
+  case D3D11_SB_OPCODE_IMM_ATOMIC_XOR:
+  case D3D11_SB_OPCODE_IMM_ATOMIC_EXCH:
+  case D3D11_SB_OPCODE_IMM_ATOMIC_IMAX:
+  case D3D11_SB_OPCODE_IMM_ATOMIC_IMIN:
+  case D3D11_SB_OPCODE_IMM_ATOMIC_UMAX:
+  case D3D11_SB_OPCODE_IMM_ATOMIC_UMIN:
+    return false;
+  }
+
+  DXASSERT(false, "otherwise the caller did not pass the right OpCode");
+  return false;
+}
+
+bool HasFeedback(D3D10_SB_OPCODE_TYPE OpCode) {
+  switch (OpCode) {
+  case D3DWDDM1_3_SB_OPCODE_GATHER4_FEEDBACK:
+  case D3DWDDM1_3_SB_OPCODE_GATHER4_C_FEEDBACK:
+  case D3DWDDM1_3_SB_OPCODE_GATHER4_PO_FEEDBACK:
+  case D3DWDDM1_3_SB_OPCODE_GATHER4_PO_C_FEEDBACK:
+  case D3DWDDM1_3_SB_OPCODE_LD_FEEDBACK:
+  case D3DWDDM1_3_SB_OPCODE_LD_MS_FEEDBACK:
+  case D3DWDDM1_3_SB_OPCODE_LD_UAV_TYPED_FEEDBACK:
+  case D3DWDDM1_3_SB_OPCODE_LD_RAW_FEEDBACK:
+  case D3DWDDM1_3_SB_OPCODE_LD_STRUCTURED_FEEDBACK:
+  case D3DWDDM1_3_SB_OPCODE_SAMPLE_L_FEEDBACK:
+  case D3DWDDM1_3_SB_OPCODE_SAMPLE_C_LZ_FEEDBACK:
+  case D3DWDDM1_3_SB_OPCODE_SAMPLE_CLAMP_FEEDBACK:
+  case D3DWDDM1_3_SB_OPCODE_SAMPLE_B_CLAMP_FEEDBACK:
+  case D3DWDDM1_3_SB_OPCODE_SAMPLE_D_CLAMP_FEEDBACK:
+  case D3DWDDM1_3_SB_OPCODE_SAMPLE_C_CLAMP_FEEDBACK:
+    return true;
+  }
+  return false;
+}
+
+unsigned GetResourceSlot(D3D10_SB_OPCODE_TYPE OpCode) {
+  switch (OpCode)
+  {
+  case D3D11_SB_OPCODE_STORE_UAV_TYPED:
+  case D3D11_SB_OPCODE_STORE_RAW:
+  case D3D11_SB_OPCODE_STORE_STRUCTURED:
+  case D3D11_SB_OPCODE_ATOMIC_AND:
+  case D3D11_SB_OPCODE_ATOMIC_OR:
+  case D3D11_SB_OPCODE_ATOMIC_XOR:
+  case D3D11_SB_OPCODE_ATOMIC_IADD:
+  case D3D11_SB_OPCODE_ATOMIC_IMAX:
+  case D3D11_SB_OPCODE_ATOMIC_IMIN:
+  case D3D11_SB_OPCODE_ATOMIC_UMAX:
+  case D3D11_SB_OPCODE_ATOMIC_UMIN:
+  case D3D11_SB_OPCODE_ATOMIC_CMP_STORE:
+    return 0;
+
+  case D3D11_SB_OPCODE_IMM_ATOMIC_IADD:
+  case D3D11_SB_OPCODE_IMM_ATOMIC_AND:
+  case D3D11_SB_OPCODE_IMM_ATOMIC_OR:
+  case D3D11_SB_OPCODE_IMM_ATOMIC_XOR:
+  case D3D11_SB_OPCODE_IMM_ATOMIC_EXCH:
+  case D3D11_SB_OPCODE_IMM_ATOMIC_IMAX:
+  case D3D11_SB_OPCODE_IMM_ATOMIC_IMIN:
+  case D3D11_SB_OPCODE_IMM_ATOMIC_UMAX:
+  case D3D11_SB_OPCODE_IMM_ATOMIC_UMIN:
+  case D3D11_SB_OPCODE_IMM_ATOMIC_CMP_EXCH:
+  case D3D10_1_SB_OPCODE_SAMPLE_INFO:
+  case D3D10_1_SB_OPCODE_SAMPLE_POS:
+    return 1;
+
+  case D3D10_SB_OPCODE_SAMPLE:
+  case D3D10_SB_OPCODE_SAMPLE_B:
+  case D3D10_SB_OPCODE_SAMPLE_L:
+  case D3D10_SB_OPCODE_SAMPLE_D:
+  case D3D10_SB_OPCODE_SAMPLE_C:
+  case D3D10_SB_OPCODE_SAMPLE_C_LZ:
+  case D3D10_SB_OPCODE_LD:
+  case D3D10_SB_OPCODE_LD_MS:
+  case D3D11_SB_OPCODE_LD_UAV_TYPED:
+  case D3D11_SB_OPCODE_LD_RAW:
+  case D3D10_SB_OPCODE_RESINFO:
+  case D3D10_1_SB_OPCODE_LOD:
+  case D3D10_1_SB_OPCODE_GATHER4:
+  case D3D11_SB_OPCODE_GATHER4_C:
+    return 2;
+
+  case D3DWDDM1_3_SB_OPCODE_SAMPLE_CLAMP_FEEDBACK:
+  case D3DWDDM1_3_SB_OPCODE_SAMPLE_B_CLAMP_FEEDBACK:
+  case D3DWDDM1_3_SB_OPCODE_SAMPLE_L_FEEDBACK:
+  case D3DWDDM1_3_SB_OPCODE_SAMPLE_D_CLAMP_FEEDBACK:
+  case D3DWDDM1_3_SB_OPCODE_SAMPLE_C_CLAMP_FEEDBACK:
+  case D3DWDDM1_3_SB_OPCODE_SAMPLE_C_LZ_FEEDBACK:
+  case D3DWDDM1_3_SB_OPCODE_LD_FEEDBACK:
+  case D3DWDDM1_3_SB_OPCODE_LD_MS_FEEDBACK:
+  case D3DWDDM1_3_SB_OPCODE_LD_UAV_TYPED_FEEDBACK:
+  case D3DWDDM1_3_SB_OPCODE_LD_RAW_FEEDBACK:
+  case D3D11_SB_OPCODE_LD_STRUCTURED:
+  case D3DWDDM1_3_SB_OPCODE_GATHER4_FEEDBACK:
+  case D3DWDDM1_3_SB_OPCODE_GATHER4_C_FEEDBACK:
+  case D3D11_SB_OPCODE_GATHER4_PO:
+  case D3D11_SB_OPCODE_GATHER4_PO_C:
+    return 3;
+
+  case D3DWDDM1_3_SB_OPCODE_LD_STRUCTURED_FEEDBACK:
+  case D3DWDDM1_3_SB_OPCODE_GATHER4_PO_FEEDBACK:
+  case D3DWDDM1_3_SB_OPCODE_GATHER4_PO_C_FEEDBACK:
+    return 4;
+  }
+
+  DXASSERT_NOMSG(false);
+  return 0;
+}
+
+DXIL::BarrierMode GetBarrierMode(bool bSyncThreadGroup,
+                                 bool bUAVFenceGlobal,
+                                 bool bUAVFenceThreadGroup,
+                                 bool bTGSMFence) {
+  unsigned M = 0;
+  if (bSyncThreadGroup)
+    M |= (unsigned)DXIL::BarrierMode::SyncThreadGroup;
+  
+  if (bUAVFenceGlobal)
+    M |= (unsigned)DXIL::BarrierMode::UAVFenceGlobal;
+
+  if (bUAVFenceThreadGroup)
+    M |= (unsigned)DXIL::BarrierMode::UAVFenceThreadGroup;
+
+  if (bTGSMFence)
+    M |= (unsigned)DXIL::BarrierMode::TGSMFence;
+
+  return (DXIL::BarrierMode)M;
+}
+
+DXIL::InputPrimitive GetInputPrimitive(D3D10_SB_PRIMITIVE Primitive) {
+  switch (Primitive) {
+  case D3D10_SB_PRIMITIVE_UNDEFINED:                return DXIL::InputPrimitive::Undefined;
+  case D3D10_SB_PRIMITIVE_POINT:                    return DXIL::InputPrimitive::Point;
+  case D3D10_SB_PRIMITIVE_LINE:                     return DXIL::InputPrimitive::Line;
+  case D3D10_SB_PRIMITIVE_TRIANGLE:                 return DXIL::InputPrimitive::Triangle;
+  case D3D10_SB_PRIMITIVE_LINE_ADJ:                 return DXIL::InputPrimitive::LineWithAdjacency;
+  case D3D10_SB_PRIMITIVE_TRIANGLE_ADJ:             return DXIL::InputPrimitive::TriangleWithAdjacency;
+  case D3D11_SB_PRIMITIVE_1_CONTROL_POINT_PATCH:    return DXIL::InputPrimitive::ControlPointPatch1;
+  case D3D11_SB_PRIMITIVE_2_CONTROL_POINT_PATCH:    return DXIL::InputPrimitive::ControlPointPatch2;
+  case D3D11_SB_PRIMITIVE_3_CONTROL_POINT_PATCH:    return DXIL::InputPrimitive::ControlPointPatch3;
+  case D3D11_SB_PRIMITIVE_4_CONTROL_POINT_PATCH:    return DXIL::InputPrimitive::ControlPointPatch4;
+  case D3D11_SB_PRIMITIVE_5_CONTROL_POINT_PATCH:    return DXIL::InputPrimitive::ControlPointPatch5;
+  case D3D11_SB_PRIMITIVE_6_CONTROL_POINT_PATCH:    return DXIL::InputPrimitive::ControlPointPatch6;
+  case D3D11_SB_PRIMITIVE_7_CONTROL_POINT_PATCH:    return DXIL::InputPrimitive::ControlPointPatch7;
+  case D3D11_SB_PRIMITIVE_8_CONTROL_POINT_PATCH:    return DXIL::InputPrimitive::ControlPointPatch8;
+  case D3D11_SB_PRIMITIVE_9_CONTROL_POINT_PATCH:    return DXIL::InputPrimitive::ControlPointPatch9;
+  case D3D11_SB_PRIMITIVE_10_CONTROL_POINT_PATCH:   return DXIL::InputPrimitive::ControlPointPatch10;
+  case D3D11_SB_PRIMITIVE_11_CONTROL_POINT_PATCH:   return DXIL::InputPrimitive::ControlPointPatch11;
+  case D3D11_SB_PRIMITIVE_12_CONTROL_POINT_PATCH:   return DXIL::InputPrimitive::ControlPointPatch12;
+  case D3D11_SB_PRIMITIVE_13_CONTROL_POINT_PATCH:   return DXIL::InputPrimitive::ControlPointPatch13;
+  case D3D11_SB_PRIMITIVE_14_CONTROL_POINT_PATCH:   return DXIL::InputPrimitive::ControlPointPatch14;
+  case D3D11_SB_PRIMITIVE_15_CONTROL_POINT_PATCH:   return DXIL::InputPrimitive::ControlPointPatch15;
+  case D3D11_SB_PRIMITIVE_16_CONTROL_POINT_PATCH:   return DXIL::InputPrimitive::ControlPointPatch16;
+  case D3D11_SB_PRIMITIVE_17_CONTROL_POINT_PATCH:   return DXIL::InputPrimitive::ControlPointPatch17;
+  case D3D11_SB_PRIMITIVE_18_CONTROL_POINT_PATCH:   return DXIL::InputPrimitive::ControlPointPatch18;
+  case D3D11_SB_PRIMITIVE_19_CONTROL_POINT_PATCH:   return DXIL::InputPrimitive::ControlPointPatch19;
+  case D3D11_SB_PRIMITIVE_20_CONTROL_POINT_PATCH:   return DXIL::InputPrimitive::ControlPointPatch20;
+  case D3D11_SB_PRIMITIVE_21_CONTROL_POINT_PATCH:   return DXIL::InputPrimitive::ControlPointPatch21;
+  case D3D11_SB_PRIMITIVE_22_CONTROL_POINT_PATCH:   return DXIL::InputPrimitive::ControlPointPatch22;
+  case D3D11_SB_PRIMITIVE_23_CONTROL_POINT_PATCH:   return DXIL::InputPrimitive::ControlPointPatch23;
+  case D3D11_SB_PRIMITIVE_24_CONTROL_POINT_PATCH:   return DXIL::InputPrimitive::ControlPointPatch24;
+  case D3D11_SB_PRIMITIVE_25_CONTROL_POINT_PATCH:   return DXIL::InputPrimitive::ControlPointPatch25;
+  case D3D11_SB_PRIMITIVE_26_CONTROL_POINT_PATCH:   return DXIL::InputPrimitive::ControlPointPatch26;
+  case D3D11_SB_PRIMITIVE_27_CONTROL_POINT_PATCH:   return DXIL::InputPrimitive::ControlPointPatch27;
+  case D3D11_SB_PRIMITIVE_28_CONTROL_POINT_PATCH:   return DXIL::InputPrimitive::ControlPointPatch28;
+  case D3D11_SB_PRIMITIVE_29_CONTROL_POINT_PATCH:   return DXIL::InputPrimitive::ControlPointPatch29;
+  case D3D11_SB_PRIMITIVE_30_CONTROL_POINT_PATCH:   return DXIL::InputPrimitive::ControlPointPatch30;
+  case D3D11_SB_PRIMITIVE_31_CONTROL_POINT_PATCH:   return DXIL::InputPrimitive::ControlPointPatch31;
+  case D3D11_SB_PRIMITIVE_32_CONTROL_POINT_PATCH:   return DXIL::InputPrimitive::ControlPointPatch32;
+  }
+
+  DXASSERT_NOMSG(false);
+  return DXIL::InputPrimitive::Undefined;
+}
+
+DXIL::PrimitiveTopology GetPrimitiveTopology(D3D10_SB_PRIMITIVE_TOPOLOGY Topology) {
+  switch(Topology) {
+  case D3D10_SB_PRIMITIVE_TOPOLOGY_UNDEFINED:           return DXIL::PrimitiveTopology::Undefined;
+  case D3D10_SB_PRIMITIVE_TOPOLOGY_POINTLIST:           return DXIL::PrimitiveTopology::PointList;
+  case D3D10_SB_PRIMITIVE_TOPOLOGY_LINELIST:            return DXIL::PrimitiveTopology::LineList;
+  case D3D10_SB_PRIMITIVE_TOPOLOGY_LINESTRIP:           return DXIL::PrimitiveTopology::LineStrip;
+  case D3D10_SB_PRIMITIVE_TOPOLOGY_TRIANGLELIST:        return DXIL::PrimitiveTopology::TriangleList;
+  case D3D10_SB_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP:       return DXIL::PrimitiveTopology::TriangleStrip;
+  case D3D10_SB_PRIMITIVE_TOPOLOGY_LINELIST_ADJ:        __fallthrough;  // The ADJ versions are redundant in DXBC and are ot used, probably put there by mistake.
+  case D3D10_SB_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ:       __fallthrough;
+  case D3D10_SB_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ:    __fallthrough;
+  case D3D10_SB_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ:   __fallthrough;
+  }
+
+  IFTBOOL(false, DXC_E_INCORRECT_DXBC);
+  return DXIL::PrimitiveTopology::Undefined;
+}
+
+const char *GetD3D10SBName(D3D10_SB_NAME D3DName) {
+  switch (D3DName) {
+  case D3D10_SB_NAME_UNDEFINED:                         return "undefined";
+  case D3D10_SB_NAME_POSITION:                          return "SV_Position";
+  case D3D10_SB_NAME_CLIP_DISTANCE:                     return "SV_ClipDistance";
+  case D3D10_SB_NAME_CULL_DISTANCE:                     return "SV_CullDistance";
+  case D3D10_SB_NAME_RENDER_TARGET_ARRAY_INDEX:         return "SV_RenderTargetArrayIndex";
+  case D3D10_SB_NAME_VIEWPORT_ARRAY_INDEX:              return "SV_ViewportArrayIndex";
+  case D3D10_SB_NAME_VERTEX_ID:                         return "SV_VertexID";
+  case D3D10_SB_NAME_PRIMITIVE_ID:                      return "SV_PrimitiveID";
+  case D3D10_SB_NAME_INSTANCE_ID:                       return "SV_InstanceID";
+  case D3D10_SB_NAME_IS_FRONT_FACE:                     return "SV_IsFrontFace";
+  case D3D10_SB_NAME_SAMPLE_INDEX:                      return "SV_SampleIndex";
+  case D3D11_SB_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR:
+  case D3D11_SB_NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR:
+  case D3D11_SB_NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR:
+  case D3D11_SB_NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR:
+  case D3D11_SB_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR:
+  case D3D11_SB_NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR:
+  case D3D11_SB_NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR:
+  case D3D11_SB_NAME_FINAL_LINE_DETAIL_TESSFACTOR:
+  case D3D11_SB_NAME_FINAL_LINE_DENSITY_TESSFACTOR:     return "SV_TessFactor";
+  case D3D11_SB_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR:
+  case D3D11_SB_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR:
+  case D3D11_SB_NAME_FINAL_TRI_INSIDE_TESSFACTOR:       return "SV_InsideTessFactor";
+  default:
+    IFT(DXC_E_INCORRECT_DXBC);
+    return "unknown";
+  }
+}
+
+unsigned GetD3D10SBSemanticIndex(D3D10_SB_NAME D3DName) {
+  switch (D3DName) {
+  case D3D10_SB_NAME_UNDEFINED:
+  case D3D10_SB_NAME_POSITION:
+  case D3D10_SB_NAME_CLIP_DISTANCE:
+  case D3D10_SB_NAME_CULL_DISTANCE:
+  case D3D10_SB_NAME_RENDER_TARGET_ARRAY_INDEX:
+  case D3D10_SB_NAME_VIEWPORT_ARRAY_INDEX:
+  case D3D10_SB_NAME_VERTEX_ID:
+  case D3D10_SB_NAME_PRIMITIVE_ID:
+  case D3D10_SB_NAME_INSTANCE_ID:
+  case D3D10_SB_NAME_IS_FRONT_FACE:
+  case D3D10_SB_NAME_SAMPLE_INDEX:                        return 0;
+  case D3D11_SB_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR:   return 0;
+  case D3D11_SB_NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR:   return 1;
+  case D3D11_SB_NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR:   return 2;
+  case D3D11_SB_NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR:   return 3;
+  case D3D11_SB_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR:      return 0;
+  case D3D11_SB_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR:      return 1;
+  case D3D11_SB_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR:    return 0;
+  case D3D11_SB_NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR:    return 1;
+  case D3D11_SB_NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR:    return 2;
+  case D3D11_SB_NAME_FINAL_TRI_INSIDE_TESSFACTOR:         return 0;
+  case D3D11_SB_NAME_FINAL_LINE_DETAIL_TESSFACTOR:        return 0;
+  case D3D11_SB_NAME_FINAL_LINE_DENSITY_TESSFACTOR:       return 1;
+  default:                                                return 0;
+  }
+}
+
+D3D_REGISTER_COMPONENT_TYPE GetD3DRegCompType(D3D10_SB_NAME D3DName) {
+  switch (D3DName) {
+  case D3D10_SB_NAME_POSITION:
+  case D3D10_SB_NAME_CLIP_DISTANCE:
+  case D3D10_SB_NAME_CULL_DISTANCE:                     return D3D_REGISTER_COMPONENT_FLOAT32;
+  case D3D10_SB_NAME_RENDER_TARGET_ARRAY_INDEX:
+  case D3D10_SB_NAME_VIEWPORT_ARRAY_INDEX:
+  case D3D10_SB_NAME_VERTEX_ID:
+  case D3D10_SB_NAME_PRIMITIVE_ID:
+  case D3D10_SB_NAME_INSTANCE_ID:
+  case D3D10_SB_NAME_IS_FRONT_FACE:
+  case D3D10_SB_NAME_SAMPLE_INDEX:                      return D3D_REGISTER_COMPONENT_UINT32;
+  case D3D10_SB_NAME_UNDEFINED: // this shpild not be called for an undefined name.
+  case D3D11_SB_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR:
+  case D3D11_SB_NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR:
+  case D3D11_SB_NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR:
+  case D3D11_SB_NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR:
+  case D3D11_SB_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR:
+  case D3D11_SB_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR:
+  case D3D11_SB_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR:
+  case D3D11_SB_NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR:
+  case D3D11_SB_NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR:
+  case D3D11_SB_NAME_FINAL_TRI_INSIDE_TESSFACTOR:
+  case D3D11_SB_NAME_FINAL_LINE_DETAIL_TESSFACTOR:
+  case D3D11_SB_NAME_FINAL_LINE_DENSITY_TESSFACTOR:
+  default:
+    IFT(DXC_E_INCORRECT_DXBC);
+    return D3D_REGISTER_COMPONENT_UNKNOWN;
+  }
+}
+
+const char *GetSemanticNameFromD3DName(D3D_NAME D3DName) {
+  switch (D3DName) {
+  case D3D_NAME_UNDEFINED:                      return "undefined";
+  case D3D_NAME_POSITION:                       return "SV_Position";
+  case D3D_NAME_CLIP_DISTANCE:                  return "SV_ClipDistance";
+  case D3D_NAME_CULL_DISTANCE:                  return "SV_CullDistance";
+  case D3D_NAME_RENDER_TARGET_ARRAY_INDEX:      return "SV_RenderTargetArrayIndex";
+  case D3D_NAME_VIEWPORT_ARRAY_INDEX:           return "SV_ViewportArrayIndex";
+  case D3D_NAME_VERTEX_ID:                      return "SV_VertexID";
+  case D3D_NAME_PRIMITIVE_ID:                   return "SV_PrimitiveID";
+  case D3D_NAME_INSTANCE_ID:                    return "SV_InstanceID";
+  case D3D_NAME_IS_FRONT_FACE:                  return "SV_IsFrontFace";
+  case D3D_NAME_SAMPLE_INDEX:                   return "SV_SampleIndex";
+  case D3D_NAME_FINAL_QUAD_EDGE_TESSFACTOR:     return "SV_TessFactor";
+  case D3D_NAME_FINAL_QUAD_INSIDE_TESSFACTOR:   return "SV_InsideTessFactor";
+  case D3D_NAME_FINAL_TRI_EDGE_TESSFACTOR:      return "SV_TessFactor";
+  case D3D_NAME_FINAL_TRI_INSIDE_TESSFACTOR:    return "SV_InsideTessFactor";
+  case D3D_NAME_FINAL_LINE_DETAIL_TESSFACTOR:   return "SV_TessFactor";
+  case D3D_NAME_FINAL_LINE_DENSITY_TESSFACTOR:  return "SV_TessFactor";
+  case D3D_NAME_TARGET:                         return "SV_Target";
+  case D3D_NAME_DEPTH:                          return "SV_Depth";
+  case D3D_NAME_COVERAGE:                       return "SV_Coverage";
+  case D3D_NAME_DEPTH_GREATER_EQUAL:            return "SV_DepthGreaterEqual";
+  case D3D_NAME_DEPTH_LESS_EQUAL:               return "SV_DepthLessEqual";
+  case D3D_NAME_STENCIL_REF:                    return "SV_StencilRef";
+  case D3D_NAME_INNER_COVERAGE:                 return "SV_InnerCoverage";
+  default: DXASSERT_NOMSG(false);               return "undefined";
+  }
+}
+
+unsigned GetSemanticIndexFromD3DName(D3D_NAME D3DName) {
+  switch (D3DName) {
+  case D3D_NAME_UNDEFINED:
+  case D3D_NAME_POSITION:
+  case D3D_NAME_CLIP_DISTANCE:
+  case D3D_NAME_CULL_DISTANCE:
+  case D3D_NAME_RENDER_TARGET_ARRAY_INDEX:
+  case D3D_NAME_VIEWPORT_ARRAY_INDEX:
+  case D3D_NAME_VERTEX_ID:
+  case D3D_NAME_PRIMITIVE_ID:
+  case D3D_NAME_INSTANCE_ID:
+  case D3D_NAME_IS_FRONT_FACE:
+  case D3D_NAME_SAMPLE_INDEX:
+  case D3D_NAME_FINAL_QUAD_EDGE_TESSFACTOR:
+  case D3D_NAME_FINAL_QUAD_INSIDE_TESSFACTOR:
+  case D3D_NAME_FINAL_TRI_EDGE_TESSFACTOR:
+  case D3D_NAME_FINAL_TRI_INSIDE_TESSFACTOR:
+  case D3D_NAME_TARGET:
+  case D3D_NAME_DEPTH:
+  case D3D_NAME_COVERAGE:
+  case D3D_NAME_DEPTH_GREATER_EQUAL:
+  case D3D_NAME_DEPTH_LESS_EQUAL:
+  case D3D_NAME_STENCIL_REF:
+  case D3D_NAME_INNER_COVERAGE:                 return UINT_MAX;
+  case D3D_NAME_FINAL_LINE_DETAIL_TESSFACTOR:   return 0;
+  case D3D_NAME_FINAL_LINE_DENSITY_TESSFACTOR:  return 1;
+  default: DXASSERT_NOMSG(false);               return UINT_MAX;
+  }
+}
+
+DXIL::TessellatorDomain GetTessellatorDomain(D3D11_SB_TESSELLATOR_DOMAIN TessDomain) {
+  switch (TessDomain) {
+  case D3D11_SB_TESSELLATOR_DOMAIN_UNDEFINED:   return DXIL::TessellatorDomain::Undefined;
+  case D3D11_SB_TESSELLATOR_DOMAIN_ISOLINE:     return DXIL::TessellatorDomain::IsoLine;
+  case D3D11_SB_TESSELLATOR_DOMAIN_TRI:         return DXIL::TessellatorDomain::Tri;
+  case D3D11_SB_TESSELLATOR_DOMAIN_QUAD:        return DXIL::TessellatorDomain::Quad;
+  }
+
+  IFTBOOL(false, DXC_E_INCORRECT_DXBC);
+  return DXIL::TessellatorDomain::Undefined;
+}
+
+DXIL::TessellatorPartitioning GetTessellatorPartitioning(D3D11_SB_TESSELLATOR_PARTITIONING TessPartitioning) {
+  switch (TessPartitioning) {
+  case D3D11_SB_TESSELLATOR_PARTITIONING_UNDEFINED:         return DXIL::TessellatorPartitioning::Undefined;
+  case D3D11_SB_TESSELLATOR_PARTITIONING_INTEGER:           return DXIL::TessellatorPartitioning::Integer;
+  case D3D11_SB_TESSELLATOR_PARTITIONING_POW2:              return DXIL::TessellatorPartitioning::Pow2;
+  case D3D11_SB_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD:    return DXIL::TessellatorPartitioning::FractionalOdd;
+  case D3D11_SB_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN:   return DXIL::TessellatorPartitioning::FractionalEven;
+  }
+
+  IFTBOOL(false, DXC_E_INCORRECT_DXBC);
+  return DXIL::TessellatorPartitioning::Undefined;
+}
+
+DXIL::TessellatorOutputPrimitive GetTessellatorOutputPrimitive(D3D11_SB_TESSELLATOR_OUTPUT_PRIMITIVE TessOutputPrimitive) {
+  switch (TessOutputPrimitive) {
+  case D3D11_SB_TESSELLATOR_OUTPUT_UNDEFINED:     return DXIL::TessellatorOutputPrimitive::Undefined;
+  case D3D11_SB_TESSELLATOR_OUTPUT_POINT:         return DXIL::TessellatorOutputPrimitive::Point;
+  case D3D11_SB_TESSELLATOR_OUTPUT_LINE:          return DXIL::TessellatorOutputPrimitive::Line;
+  case D3D11_SB_TESSELLATOR_OUTPUT_TRIANGLE_CW:   return DXIL::TessellatorOutputPrimitive::TriangleCW;
+  case D3D11_SB_TESSELLATOR_OUTPUT_TRIANGLE_CCW:  return DXIL::TessellatorOutputPrimitive::TriangleCCW;
+  }
+
+  IFTBOOL(false, DXC_E_INCORRECT_DXBC);
+  return DXIL::TessellatorOutputPrimitive::Undefined;
+}
+
+} // namespace DXBC
+
+
+
+//------------------------------------------------------------------------------
+//
+//  Asserts to match DXBC and DXIL constant values.
+//
+using namespace DXIL;
+
+#define MSG "Constant value mismatch between DXBC and DXIL"
+
+static_assert(kMaxTempRegCount             == D3D11_COMMONSHADER_TEMP_REGISTER_COUNT, MSG);
+static_assert(kMaxCBufferSize              == D3D10_REQ_CONSTANT_BUFFER_ELEMENT_COUNT, MSG);
+
+static_assert((int)DxilSampler::SamplerKind::Default     == D3D10_SB_SAMPLER_MODE_DEFAULT, MSG);
+static_assert((int)DxilSampler::SamplerKind::Comparison  == D3D10_SB_SAMPLER_MODE_COMPARISON, MSG);
+static_assert((int)DxilSampler::SamplerKind::Mono        == D3D10_SB_SAMPLER_MODE_MONO, MSG);
+
+static_assert(D3D10_SB_4_COMPONENT_X       == 0, MSG);
+static_assert(D3D10_SB_4_COMPONENT_Y       == 1, MSG);
+static_assert(D3D10_SB_4_COMPONENT_Z       == 2, MSG);
+static_assert(D3D10_SB_4_COMPONENT_W       == 3, MSG);
+
+static_assert(D3D_MIN_PRECISION_DEFAULT    == D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT, MSG);
+static_assert(D3D_MIN_PRECISION_FLOAT_16   == D3D11_SB_OPERAND_MIN_PRECISION_FLOAT_16, MSG);
+static_assert(D3D_MIN_PRECISION_FLOAT_2_8  == D3D11_SB_OPERAND_MIN_PRECISION_FLOAT_2_8, MSG);
+static_assert(D3D_MIN_PRECISION_SINT_16    == D3D11_SB_OPERAND_MIN_PRECISION_SINT_16, MSG);
+static_assert(D3D_MIN_PRECISION_UINT_16    == D3D11_SB_OPERAND_MIN_PRECISION_UINT_16, MSG);
+
+} // namespace hlsl

+ 191 - 0
projects/dxilconv/lib/DxbcConverter/DxbcUtil.h

@@ -0,0 +1,191 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// DxbcUtil.h                                                                //
+// Copyright (c) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// Utilities to convert from DXBC to DXIL.                                   //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#pragma once
+
+#include "dxc/DXIL/DxilShaderModel.h"
+#include "dxc/DXIL/DxilSemantic.h"
+#include "dxc/DXIL/DxilInterpolationMode.h"
+#include "dxc/DXIL/DxilCompType.h"
+#include "dxc/DXIL/DxilSampler.h"
+#include "dxc/DXIL/DxilResource.h"
+#include "dxc/DXIL/DxilConstants.h"
+
+namespace llvm {
+class Type;
+class LLVMContext;
+class Value;
+class AtomicRMWInst;
+enum AtomicRMWInst::BinOp;
+}
+
+#define DXASSERT_DXBC(__exp) DXASSERT(__exp, "otherwise incorrect assumption about DXBC")
+
+
+namespace hlsl {
+
+namespace DXBC {
+
+// Width of DXBC vector operand.
+const BYTE kWidth = 4;
+// DXBC mask with all active components.
+const BYTE kAllCompMask = 0x0F;
+
+ShaderModel::Kind GetShaderModelKind(D3D10_SB_TOKENIZED_PROGRAM_TYPE Type);
+
+// Query DXBC shader flags.
+bool IsFlagDisableOptimizations(unsigned Flags);
+bool IsFlagDisableMathRefactoring(unsigned Flags);
+bool IsFlagEnableDoublePrecision(unsigned Flags);
+bool IsFlagForceEarlyDepthStencil(unsigned Flags);
+bool IsFlagEnableRawAndStructuredBuffers(unsigned Flags);
+bool IsFlagEnableMinPrecision(unsigned Flags);
+bool IsFlagEnableDoubleExtensions(unsigned Flags);
+bool IsFlagEnableMSAD(unsigned Flags);
+bool IsFlagAllResourcesBound(unsigned Flags);
+
+InterpolationMode::Kind GetInterpolationModeKind(D3D_INTERPOLATION_MODE Mode);
+
+D3D10_SB_OPERAND_TYPE GetOperandRegType(Semantic::Kind Kind, bool IsOutput);
+
+DxilResource::Kind GetResourceKind(D3D10_SB_RESOURCE_DIMENSION ResType);
+BYTE GetNumResCoords(DxilResource::Kind ResKind);
+BYTE GetNumResOffsets(DxilResource::Kind ResKind);
+
+CompType GetCompType(D3D_REGISTER_COMPONENT_TYPE CompTy);
+CompType GetCompTypeWithMinPrec(D3D_REGISTER_COMPONENT_TYPE BaseCompTy, D3D11_SB_OPERAND_MIN_PRECISION MinPrec);
+CompType GetCompTypeWithMinPrec(CompType BaseCompTy, D3D11_SB_OPERAND_MIN_PRECISION MinPrec);
+CompType GetCompTypeFromMinPrec(D3D11_SB_OPERAND_MIN_PRECISION MinPrec, CompType DefaultPrecCompType);
+
+CompType GetResCompType(D3D10_SB_RESOURCE_RETURN_TYPE CompTy);
+CompType GetDeclResCompType(D3D10_SB_RESOURCE_RETURN_TYPE CompTy);
+
+char GetCompName(BYTE c);
+
+DxilSampler::SamplerKind GetSamplerKind(D3D10_SB_SAMPLER_MODE Mode);
+
+unsigned GetRegIndex(unsigned Reg, unsigned Comp);
+
+DXIL::AtomicBinOpCode GetAtomicBinOp(D3D10_SB_OPCODE_TYPE DxbcOpCode);
+llvm::AtomicRMWInst::BinOp GetLlvmAtomicBinOp(D3D10_SB_OPCODE_TYPE DxbcOpCode);
+bool AtomicBinOpHasReturn(D3D10_SB_OPCODE_TYPE DxbcOpCode);
+bool IsCompareExchAtomicBinOp(D3D10_SB_OPCODE_TYPE DxbcOpCode);
+
+bool HasFeedback(D3D10_SB_OPCODE_TYPE OpCode);
+unsigned GetResourceSlot(D3D10_SB_OPCODE_TYPE OpCode);
+
+DXIL::BarrierMode GetBarrierMode(bool bSyncThreadGroup, bool bUAVFenceGlobal, 
+                                 bool bUAVFenceThreadGroup, bool bTGSMFence);
+
+DXIL::InputPrimitive GetInputPrimitive(D3D10_SB_PRIMITIVE Primitive);
+DXIL::PrimitiveTopology GetPrimitiveTopology(D3D10_SB_PRIMITIVE_TOPOLOGY Topology);
+
+const char *GetD3D10SBName(D3D10_SB_NAME D3DName);
+unsigned GetD3D10SBSemanticIndex(D3D10_SB_NAME D3DName);
+D3D_REGISTER_COMPONENT_TYPE GetD3DRegCompType(D3D10_SB_NAME D3DName);
+const char *GetSemanticNameFromD3DName(D3D_NAME D3DName);
+unsigned GetSemanticIndexFromD3DName(D3D_NAME D3DName);
+
+DXIL::TessellatorDomain GetTessellatorDomain(D3D11_SB_TESSELLATOR_DOMAIN TessDomain);
+DXIL::TessellatorPartitioning GetTessellatorPartitioning(D3D11_SB_TESSELLATOR_PARTITIONING TessPartitioning);
+DXIL::TessellatorOutputPrimitive GetTessellatorOutputPrimitive(D3D11_SB_TESSELLATOR_OUTPUT_PRIMITIVE TessOutputPrimitive);
+
+} // namespace DXBC
+
+
+/// Use this class to represent DXBC register component mask.
+class CMask {
+public:
+  CMask();
+  CMask(BYTE Mask);
+  CMask(BYTE c0, BYTE c1, BYTE c2, BYTE c3);
+  CMask(BYTE StartComp, BYTE NumComp);
+
+  BYTE ToByte() const;
+
+  static bool IsSet(BYTE Mask, BYTE c);
+  bool IsSet(BYTE c) const;
+  void Set(BYTE c);
+
+  CMask operator|(const CMask &o);
+
+  BYTE GetNumActiveComps() const;
+  BYTE GetNumActiveRangeComps() const;
+  bool IsZero() const { return GetNumActiveComps() == 0; }
+
+  BYTE GetFirstActiveComp() const;
+
+  static BYTE MakeMask(BYTE c0, BYTE c1, BYTE c2, BYTE c3);
+  static CMask MakeXYZWMask();
+  static CMask MakeFirstNCompMask(BYTE n);
+  static CMask MakeCompMask(BYTE Component);
+  static CMask MakeXMask();
+
+  static bool IsValidDoubleMask(const CMask &Mask);
+  static CMask GetMaskForDoubleOperation(const CMask &Mask);
+
+  static CMask FromDXBC(const unsigned DxbcMask);
+
+protected:
+  BYTE m_Mask;
+};
+
+
+/// Use this class to pass around DXBC register component values.
+class OperandValue {
+  friend class OperandValueHelper;
+  typedef llvm::Value * PValue;
+  PValue m_pVal[DXBC::kWidth];
+public:
+  OperandValue();
+  PValue &operator[](BYTE c);
+  const PValue &operator[](BYTE c) const;
+};
+
+
+/// \brief Use this one-time-iterator class to set up component values of input operands, 
+/// replicating the same value to all components with the same swizzled name.
+///
+/// After creation an instance serves as an iterator to iterate through
+/// uniques components and set their values in the OperandValue instance.
+/// After the iterator is done, the instance is not usable anymore.
+///
+/// Usage:
+///    OperandValueHelper OVH(OpVal, Mask, Swizzle);
+///    for (; !OVH.IsDone(); OVH.Advance()) {
+///      BYTE Comp = OVH.GetComp();
+///      ...  // Create llvm::Value *pVal
+///      OHV.SetValue(pVal); // for all components with the same swizzle name
+///      }
+class OperandValueHelper {
+public:
+  OperandValueHelper();
+  OperandValueHelper(OperandValue &OpValue, const CMask &Mask, const D3D10ShaderBinary::COperandBase &O);
+
+  /// Returns the value of the current active wrt to Mask component.
+  BYTE GetComp() const;
+  /// Returns true is there are no more active components.
+  bool IsDone() const;
+  /// Advances the iterator to the next unique, active component.
+  void Advance();
+  /// Sets the value of all active components with the same swizzle name in OperandValue OpValue.
+  void SetValue(llvm::Value *pValue);
+
+private:
+  static const BYTE kBadComp = 0xFF;
+  OperandValue *m_pOpValue;
+  BYTE m_Components[DXBC::kWidth];
+  BYTE m_Index;
+
+  void Initialize(const CMask &Mask, const BYTE CompSwizzle[DXBC::kWidth]);
+};
+
+} // namespace hlsl

+ 9 - 0
projects/dxilconv/lib/DxilConvPasses/CMakeLists.txt

@@ -0,0 +1,9 @@
+# Build DxilConvPasses.lib.
+
+add_dxilconv_project_library(DxilConvPasses
+  NormalizeDxil.cpp
+  ScopeNestedCFG.cpp
+  InitializePasses.cpp
+  ScopeNestInfo.cpp
+  DxilCleanup.cpp
+)

+ 1334 - 0
projects/dxilconv/lib/DxilConvPasses/DxilCleanup.cpp

@@ -0,0 +1,1334 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// DxilCleanup.cpp                                                           //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// Optimization of DXIL after conversion from DXBC.                          //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+//===----------------------------------------------------------------------===//
+//                    DXIL Cleanup Transformation
+//===----------------------------------------------------------------------===//
+//
+// The pass cleans up DXIL obtained after conversion from DXBC.
+// Essentially, the pass construct efficient SSA for DXBC r-registers and
+// performs the following:
+//   1. Removes TempRegStore/TempRegLoad calls, replacing DXBC registers with
+//      either temporary or global LLVM values.
+//   2. Minimizes the number of bitcasts induced by the lack of types in DXBC.
+//   3. Removes helper operations to support DXBC conditionals, translated to i1.
+//   4. Recovers doubles from pairs of 32-bit DXBC registers.
+//   5. Removes MinPrecXRegLoad and MinPrecXRegStore for DXBC indexable,
+//      min-presicion x-registers.
+//
+// Clarification of important algorithmic decisions:
+//   1. A live range (LR) is all defs connected via phi-nodes. A straightforward
+//      recursive algorithm is used to collect LR's set of defs.
+//   2. Live ranges are "connected" to other liver ranges via DXIL bitcasts.
+//      This creates a bitcast graphs.
+//   3. Live ranges are assigned types based on the number of float (F) or
+//      integer (I) defs. A bitcast def initially has an unknow type (U).
+//      Each LR is assigned type only once. LRs are processed in dynamic order 
+//      biased towards LRs with known types, e.g., numF > numI + numU.
+//      When a LR is assigned final type, emanating bitcasts become "resolved"
+//      and contribute desired type to the neighboring LRs.
+//   4. After all LRs are processed, each LR is assigned final type based on 
+//      the number of F and I defs. If type changed from the initial assumption,
+//      the code is rewritten accordingly: new bitcasts are inserted for 
+//      correctness.
+//   5. After every LR type is finalized, chains of bitcasts are cleaned up.
+//   6. The algorithm splits 16- and 32-bit LRs.
+//   7. Registers that are used in an entry and another subroutine are 
+//      represented as global variables.
+//
+
+#include "DxilConvPasses/DxilCleanup.h"
+#include "dxc/Support/Global.h"
+#include "dxc/DXIL/DxilModule.h"
+#include "dxc/DXIL/DxilOperations.h"
+#include "dxc/DXIL/DxilInstructions.h"
+
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/MapVector.h"
+
+#include <utility>
+#include <vector>
+#include <set>
+#include <queue>
+#include <algorithm>
+
+using namespace llvm;
+using namespace llvm::legacy;
+using namespace hlsl;
+using std::string;
+using std::vector;
+using std::set;
+using std::pair;
+
+#define DXILCLEANUP_DBG   0
+
+#define DEBUG_TYPE "dxilcleanup"
+
+#if DXILCLEANUP_DBG
+static void debugprint(const char *banner, Module &M) {
+  std::string buf;
+  raw_string_ostream os(buf);
+  os << banner << "\n";
+  M.print(os, nullptr);
+  os.flush();
+  std::puts(buf.c_str());
+}
+#endif
+
+
+namespace DxilCleanupNS {
+
+/// Use this class to optimize DXIL after conversion from DXBC.
+class DxilCleanup : public ModulePass {
+public:
+  static char ID;
+
+  DxilCleanup() : ModulePass(ID), m_pCtx(nullptr), m_pModule(nullptr) {
+    initializeDxilCleanupPass(*PassRegistry::getPassRegistry());
+  }
+
+  virtual bool runOnModule(Module &M);
+
+  struct LiveRange {
+    unsigned id;
+    SmallVector<Value *, 4> defs;
+    SmallDenseMap<unsigned, unsigned, 4> bitcastMap;
+    unsigned numI;
+    unsigned numF;
+    unsigned numU;
+    Type *pNewType;
+    LiveRange() : id(0), numI(0), numF(0), numU(0), pNewType(nullptr) {}
+    LiveRange operator=(const LiveRange &) = delete;
+
+    // I cannot delete these constructors, because vector depends on them, even if I never trigger them.
+    // So assert if they are hit instead.
+    LiveRange(const LiveRange &other)
+      : id(other.id), numI(other.numI), numF(other.numF), numU(other.numU), pNewType(other.pNewType),
+      defs(other.defs), bitcastMap(other.bitcastMap)
+    { DXASSERT_NOMSG(false); }
+    LiveRange(LiveRange &&other)
+      : id(other.id), numI(other.numI), numF(other.numF), numU(other.numU), pNewType(other.pNewType),
+        defs(std::move(other.defs)), bitcastMap(std::move(other.bitcastMap))
+    { DXASSERT_NOMSG(false); }
+
+    unsigned GetCaseNumber() const;
+    void GuessType(LLVMContext &Ctx);
+    bool operator<(const LiveRange &other) const;
+  };
+
+private:
+  const unsigned kRegCompAlignment = 4;
+
+  LLVMContext *m_pCtx;
+  Module *m_pModule;
+  DxilModule *m_pDxilModule;
+
+  vector<LiveRange> m_LiveRanges;
+  DenseMap<Value *, unsigned> m_LiveRangeMap;
+
+  void OptimizeIdxRegDecls();
+  bool OptimizeIdxRegDecls_CollectUsage(Value *pDecl, unsigned &numF, unsigned &numI);
+  bool OptimizeIdxRegDecls_CollectUsageForUser(User *U, bool bFlt, bool bInt, unsigned &numF, unsigned &numI);
+  Type *OptimizeIdxRegDecls_DeclareType(Type *pOldType);
+  void OptimizeIdxRegDecls_ReplaceDecl(Value *pOldDecl, Value *pNewDecl, vector<Instruction*> &InstrToErase);
+  void OptimizeIdxRegDecls_ReplaceGEPUse(Value *pOldGEPUser, Value *pNewGEP, Value *pOldDecl, Value *pNewDecl, vector<Instruction*> &InstrToErase);
+
+  void RemoveRegLoadStore();
+  void ConstructSSA();
+  void CollectLiveRanges();
+  void CountLiveRangeRec(unsigned LRId, Instruction *pInst);
+  void RecoverLiveRangeRec(LiveRange &LR, Instruction *pInst);
+  void InferLiveRangeTypes();
+  void ChangeLiveRangeTypes();
+  void CleanupPatterns();
+  void RemoveDeadCode();
+
+  Value *CastValue(Value *pValue, Type *pToType, Instruction *pOrigInst);
+  bool IsDxilBitcast(Value *pValue);
+  ArrayType *GetDeclArrayType(Type *pSrcType);
+  Type *GetDeclScalarType(Type *pSrcType);
+};
+
+char DxilCleanup::ID = 0;
+
+//------------------------------------------------------------------------------
+//
+//  DxilCleanup methods.
+//
+bool DxilCleanup::runOnModule(Module &M) {
+  m_pModule = &M;
+  m_pCtx = &M.getContext();
+  m_pDxilModule = &m_pModule->GetOrCreateDxilModule();
+
+  OptimizeIdxRegDecls();
+  RemoveRegLoadStore();
+  ConstructSSA();
+  CollectLiveRanges();
+  InferLiveRangeTypes();
+  ChangeLiveRangeTypes();
+  CleanupPatterns();
+  RemoveDeadCode();
+
+  return true;
+}
+
+void DxilCleanup::OptimizeIdxRegDecls() {
+  // 1. Convert global x-register decl into alloca if used only in one function.
+  for (auto itGV = m_pModule->global_begin(), endGV = m_pModule->global_end(); itGV != endGV; ) {
+    GlobalVariable *GV = itGV;
+    ++itGV;
+    if (GV->isConstant() || GV->getLinkage() != GlobalValue::InternalLinkage) continue;
+    PointerType *pPtrType = dyn_cast<PointerType>(GV->getType());
+    if (!pPtrType || pPtrType->getAddressSpace() != DXIL::kDefaultAddrSpace) continue;
+
+    Type *pElemType = pPtrType->getElementType();
+
+    Function *F = nullptr;
+    for (User *U : GV->users()) {
+      Instruction *I = dyn_cast<Instruction>(U);
+      if (!I || (F && I->getParent()->getParent() != F)) {
+        F = nullptr;
+        break;
+      }
+
+      F = cast<Function>(I->getParent()->getParent());
+    }
+
+    if (F) {
+      // Promote to alloca.
+      Instruction *pAnchor = F->getEntryBlock().begin();
+      AllocaInst *AI = new AllocaInst(pElemType, nullptr, GV->getName(), pAnchor);
+      AI->setAlignment(GV->getAlignment());
+      GV->replaceAllUsesWith(AI);
+      GV->eraseFromParent();
+    }
+  }
+
+  // 2. Collect x-register alloca usage stats and change type, if profitable.
+  for (auto itF = m_pModule->begin(), endFn = m_pModule->end(); itF != endFn; ++itF) {
+    Function *F = itF;
+    if (F->empty()) continue;
+    BasicBlock *pEntryBB = &F->getEntryBlock();
+    vector<Instruction*> InstrToErase;
+
+    for (auto itInst = pEntryBB->begin(), endInst = pEntryBB->end(); itInst != endInst; ++itInst) {
+      AllocaInst *AI = dyn_cast<AllocaInst>(itInst);
+      if (!AI) continue;
+
+      Type *pScalarType = GetDeclScalarType(AI->getType());
+      if (pScalarType != Type::getFloatTy(*m_pCtx) && pScalarType != Type::getHalfTy(*m_pCtx) &&
+          pScalarType != Type::getInt32Ty(*m_pCtx) && pScalarType != Type::getInt16Ty(*m_pCtx)) {
+        continue;
+      }
+
+      // Collect usage stats and potentially change decl type.
+      unsigned numF, numI;
+      if (OptimizeIdxRegDecls_CollectUsage(AI, numF, numI)) {
+        Type *pScalarType = GetDeclScalarType(AI->getType());
+        if ((pScalarType->isFloatingPointTy() && numI > numF) || 
+            (pScalarType->isIntegerTy() && numF >= numI)) {
+          Type *pNewType = OptimizeIdxRegDecls_DeclareType(AI->getType());
+          if (pNewType) {
+            // Replace alloca.
+            AllocaInst *AI2 = new AllocaInst(pNewType, nullptr, AI->getName(), AI);
+            AI2->setAlignment(AI->getAlignment());
+            OptimizeIdxRegDecls_ReplaceDecl(AI, AI2, InstrToErase);
+            InstrToErase.emplace_back(AI);
+          }
+        }
+      }
+    }
+
+    for (auto *I : InstrToErase) {
+      I->eraseFromParent();
+    }
+  }
+
+  // 3. Collect x-register global decl usage stats and change type, if profitable.
+  llvm::SmallVector<GlobalVariable*, 4> GVWorklist;
+  for (auto itGV = m_pModule->global_begin(), endGV = m_pModule->global_end(); itGV != endGV; ) {
+    GlobalVariable *pOldGV = itGV;
+    ++itGV;
+    if (pOldGV->isConstant()) continue;
+    PointerType *pOldPtrType = dyn_cast<PointerType>(pOldGV->getType());
+    if (!pOldPtrType || pOldPtrType->getAddressSpace() != DXIL::kDefaultAddrSpace) continue;
+
+    unsigned numF, numI;
+    if (OptimizeIdxRegDecls_CollectUsage(pOldGV, numF, numI)) {
+      Type *pScalarType = GetDeclScalarType(pOldGV->getType());
+      if ((pScalarType->isFloatingPointTy() && numI > numF) || 
+          (pScalarType->isIntegerTy() && numF >= numI)) {
+        GVWorklist.push_back(pOldGV);
+      }
+    }
+  }
+
+  for (auto pOldGV : GVWorklist) {
+    if (Type *pNewType = OptimizeIdxRegDecls_DeclareType(pOldGV->getType())) {
+      // Replace global decl.
+      PointerType *pOldPtrType = dyn_cast<PointerType>(pOldGV->getType());
+      GlobalVariable *pNewGV = new GlobalVariable(*m_pModule, pNewType, false, pOldGV->getLinkage(),
+                                                  UndefValue::get(pNewType), pOldGV->getName(),
+                                                  nullptr, pOldGV->getThreadLocalMode(),
+                                                  pOldPtrType->getAddressSpace());
+      vector<Instruction*> InstrToErase;
+      OptimizeIdxRegDecls_ReplaceDecl(pOldGV, pNewGV, InstrToErase);
+      for (auto *I : InstrToErase) {
+        I->eraseFromParent();
+      }
+      pOldGV->eraseFromParent();
+    }
+  }
+}
+
+ArrayType *DxilCleanup::GetDeclArrayType(Type *pSrcType) {
+  PointerType *pPtrType = dyn_cast<PointerType>(pSrcType);
+  if (!pPtrType) return nullptr;
+
+  if (ArrayType *pArrayType = dyn_cast<ArrayType>(pPtrType->getElementType())) {
+    return pArrayType;
+  }
+
+  return nullptr;
+}
+
+Type *DxilCleanup::GetDeclScalarType(Type *pSrcType) {
+  PointerType *pPtrType = dyn_cast<PointerType>(pSrcType);
+  if (!pPtrType) return nullptr;
+
+  Type *pScalarType = pPtrType->getElementType();
+  if (ArrayType *pArrayType = dyn_cast<ArrayType>(pScalarType)) {
+    pScalarType = pArrayType->getArrayElementType();
+  }
+
+  return pScalarType;
+}
+
+Type *DxilCleanup::OptimizeIdxRegDecls_DeclareType(Type *pOldType) {
+  Type *pNewType = nullptr;
+  Type *pScalarType = GetDeclScalarType(pOldType);
+  if (ArrayType *pArrayType = GetDeclArrayType(pOldType)) {
+    uint64_t ArraySize = pArrayType->getArrayNumElements();
+    if (pScalarType == Type::getFloatTy(*m_pCtx)) {
+      pNewType = ArrayType::get(Type::getInt32Ty(*m_pCtx), ArraySize);
+    } else if (pScalarType == Type::getHalfTy(*m_pCtx)) {
+      pNewType = ArrayType::get(Type::getInt16Ty(*m_pCtx), ArraySize);
+    } else if (pScalarType == Type::getInt32Ty(*m_pCtx)) {
+      pNewType = ArrayType::get(Type::getFloatTy(*m_pCtx), ArraySize);
+    } else if (pScalarType == Type::getInt16Ty(*m_pCtx)) {
+      pNewType = ArrayType::get(Type::getHalfTy(*m_pCtx), ArraySize);
+    } else {
+      IFT(DXC_E_OPTIMIZATION_FAILED);
+    }
+  } else {
+    if (pScalarType == Type::getFloatTy(*m_pCtx)) {
+      pNewType = Type::getInt32Ty(*m_pCtx);
+    } else if (pScalarType == Type::getHalfTy(*m_pCtx)) {
+      pNewType = Type::getInt16Ty(*m_pCtx);
+    } else if (pScalarType == Type::getInt32Ty(*m_pCtx)) {
+      pNewType = Type::getFloatTy(*m_pCtx);
+    } else if (pScalarType == Type::getInt16Ty(*m_pCtx)) {
+      pNewType = Type::getHalfTy(*m_pCtx);
+    } else {
+      IFT(DXC_E_OPTIMIZATION_FAILED);
+    }
+  }
+  return pNewType;
+}
+
+bool DxilCleanup::OptimizeIdxRegDecls_CollectUsage(Value *pDecl, unsigned &numF, unsigned &numI) {
+  numF = numI = 0;
+  Type *pScalarType = GetDeclScalarType(pDecl->getType());
+  if (!pScalarType) return false;
+  bool bFlt = pScalarType == Type::getFloatTy(*m_pCtx) || pScalarType == Type::getHalfTy(*m_pCtx);
+  bool bInt = pScalarType == Type::getInt32Ty(*m_pCtx) || pScalarType == Type::getInt16Ty(*m_pCtx);
+  if (!(bFlt || bInt)) return false;
+
+  for (User *U : pDecl->users()) {
+    if (GetElementPtrInst *pGEP = dyn_cast<GetElementPtrInst>(U)) {
+      for (User *U2 : pGEP->users()) {
+        if (!OptimizeIdxRegDecls_CollectUsageForUser(U2, bFlt, bInt, numF, numI))
+          return false;
+      }
+    } else if (GEPOperator *pGEP = dyn_cast<GEPOperator>(U)) {
+      for (User *U2 : pGEP->users()) {
+        if (!OptimizeIdxRegDecls_CollectUsageForUser(U2, bFlt, bInt, numF, numI))
+          return false;
+      }
+    } else if (BitCastInst *pBC = dyn_cast<BitCastInst>(U)) {
+      if (pBC->getType() != Type::getDoublePtrTy(*m_pCtx)) return false;
+    } else {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool DxilCleanup::OptimizeIdxRegDecls_CollectUsageForUser(User *U, bool bFlt, bool bInt, unsigned &numF, unsigned &numI) {
+  if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+    for (User *U2 : LI->users()) {
+      if (!IsDxilBitcast(U2)) {
+        if (bFlt) numF++;
+        if (bInt) numI++;
+      } else {
+        if (bFlt) numI++;
+        if (bInt) numF++;
+      }
+    }
+  } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+    Value *pValue = SI->getValueOperand();
+    if (!IsDxilBitcast(pValue)) {
+      if (bFlt) numF++;
+      if (bInt) numI++;
+    } else {
+      if (bFlt) numI++;
+      if (bInt) numF++;
+    }
+  } else {
+    return false;
+  }
+
+  return true;
+}
+
+void DxilCleanup::OptimizeIdxRegDecls_ReplaceDecl(Value *pOldDecl, Value *pNewDecl,
+                                                  vector<Instruction*> &InstrToErase) {
+  for (auto itU = pOldDecl->use_begin(), endU = pOldDecl->use_end(); itU != endU; ++itU) {
+    User *I = itU->getUser();
+
+    if (GetElementPtrInst *pOldGEP = dyn_cast<GetElementPtrInst>(I)) {
+      // Case 1. Load.
+      //   %44 = getelementptr [24 x float], [24 x float]* %dx.v32.x0, i32 0, i32 %43
+      //   %45 = load float, float* %44, align 4
+      //   %46 = add float %45, ...
+      // becomes
+      //   %44 = getelementptr [24 x i32], [24 x i32]* %dx.v32.x0, i32 0, i32 %43
+      //   %45 = load i32, i32* %44, align 4
+      //   %t1 = call float @dx.op.bitcastI32toF32 i32 %45
+      //   %46 = add i32 %t1, ...
+      //
+      // Case 2. Store.
+      //   %31 = add float ...
+      //   %32 = getelementptr [24 x float], [24 x float]* %dx.v32.x0, i32 0, i32 16
+      //   store float %31, float* %32, align 4
+      // becomes
+      //   %31 = add float ...
+      //   %32 = getelementptr [24 x i32], [24 x i32]* %dx.v32.x0, i32 0, i32 16
+      //   %t1 = call i32 @dx.op.bitcastF32toI32 float %31
+      //   store i32 %t1, i32* %32, align 4
+      //
+      SmallVector<Value *, 4> GEPIndices;
+      for (auto i = pOldGEP->idx_begin(), e = pOldGEP->idx_end(); i != e; i++) {
+        GEPIndices.push_back(*i);
+      }
+      GetElementPtrInst *pNewGEP = GetElementPtrInst::Create(nullptr, pNewDecl, GEPIndices, pOldGEP->getName(), pOldGEP->getNextNode());
+      for (auto itU2 = pOldGEP->use_begin(), endU2 = pOldGEP->use_end(); itU2 != endU2; ++itU2) {
+        Value *pOldGEPUser = itU2->getUser();
+        OptimizeIdxRegDecls_ReplaceGEPUse(pOldGEPUser, pNewGEP, pOldDecl, pNewDecl, InstrToErase);
+      }
+      InstrToErase.emplace_back(pOldGEP);
+    } else if (GEPOperator *pOldGEP = dyn_cast<GEPOperator>(I)) {
+      // The cases are the same as for the GetElementPtrInst above.
+      SmallVector<Value *, 4> GEPIndices;
+      for (auto i = pOldGEP->idx_begin(), e = pOldGEP->idx_end(); i != e; i++) {
+        GEPIndices.push_back(*i);
+      }
+      Type *pNewGEPElemType = cast<PointerType>(pNewDecl->getType())->getElementType();
+      Constant *pNewGEPOp = ConstantExpr::getGetElementPtr(pNewGEPElemType, cast<Constant>(pNewDecl), GEPIndices, pOldGEP->isInBounds());
+      GEPOperator *pNewGEP = cast<GEPOperator>(pNewGEPOp);
+      for (auto itU2 = pOldGEP->use_begin(), endU2 = pOldGEP->use_end(); itU2 != endU2; ++itU2) {
+        Value *pOldGEPUser = itU2->getUser();
+        OptimizeIdxRegDecls_ReplaceGEPUse(pOldGEPUser, pNewGEP, pOldDecl, pNewDecl, InstrToErase);
+      }
+    } else if (BitCastInst *pOldBC = dyn_cast<BitCastInst>(I)) {
+      //   %1 = bitcast [24 x float]* %dx.v32.x0 to double*
+      // becomes
+      //   %1 = bitcast [24 x i32]* %dx.v32.x0 to double*
+      BitCastInst *pNewBC = new BitCastInst(pNewDecl, pOldBC->getType(), pOldBC->getName(), pOldBC->getNextNode());
+      pOldBC->replaceAllUsesWith(pNewBC);
+      InstrToErase.emplace_back(pOldBC);
+    } else {
+      IFT(DXC_E_OPTIMIZATION_FAILED);
+    }
+  }
+}
+
+void DxilCleanup::OptimizeIdxRegDecls_ReplaceGEPUse(Value *pOldGEPUser, Value *pNewGEP,
+                                                    Value *pOldDecl, Value *pNewDecl,
+                                                    vector<Instruction*> &InstrToErase) {
+  if (LoadInst *pOldLI = dyn_cast<LoadInst>(pOldGEPUser)) {
+    LoadInst *pNewLI = new LoadInst(pNewGEP, pOldLI->getName(), pOldLI->getNextNode());
+    pNewLI->setAlignment(pOldLI->getAlignment());
+    Value *pNewValue = CastValue(pNewLI, GetDeclScalarType(pOldDecl->getType()), pNewLI->getNextNode());
+    pOldLI->replaceAllUsesWith(pNewValue);
+    InstrToErase.emplace_back(pOldLI);
+  } else if (StoreInst *pOldSI = dyn_cast<StoreInst>(pOldGEPUser)) {
+    Value *pOldValue = pOldSI->getValueOperand();
+    Value *pNewValue = CastValue(pOldValue, GetDeclScalarType(pNewDecl->getType()), pOldSI);
+    StoreInst *pNewSI = new StoreInst(pNewValue, pNewGEP, pOldSI->getNextNode());
+    pNewSI->setAlignment(pOldSI->getAlignment());
+    InstrToErase.emplace_back(pOldSI);
+  } else {
+    IFT(DXC_E_OPTIMIZATION_FAILED);
+  }
+}
+
+void DxilCleanup::RemoveRegLoadStore() {
+  struct RegRec {
+    unsigned numI32;
+    unsigned numF32;
+    unsigned numI16;
+    unsigned numF16;
+    Value *pDecl32;
+    Value *pDecl16;
+    RegRec() : numI32(0), numF32(0), numI16(0), numF16(0), pDecl32(nullptr), pDecl16(nullptr) {}
+  };
+  struct FuncRec {
+    MapVector<unsigned, RegRec> RegMap;
+    bool bEntry;
+    bool bCallsOtherFunc;
+    FuncRec() : bEntry(false), bCallsOtherFunc(false) {}
+  };
+  MapVector<Function *, FuncRec> FuncMap;
+
+  // 1. For each r-register, collect usage stats.
+  for (auto itF = m_pModule->begin(), endFn = m_pModule->end(); itF != endFn; ++itF) {
+    Function *F = itF;
+    if (F->empty()) continue;
+    DXASSERT_NOMSG(FuncMap.find(F) == FuncMap.end());
+    FuncRec &FR = FuncMap[F];
+
+    // Detect entry.
+    if (F == m_pDxilModule->GetEntryFunction() || 
+        F == m_pDxilModule->GetPatchConstantFunction()) {
+      FR.bEntry = true;
+    }
+
+    for (auto itBB = F->begin(), endBB = F->end(); itBB != endBB; ++itBB) {
+      BasicBlock *BB = itBB;
+
+      for (auto itInst = BB->begin(), endInst = BB->end(); itInst != endInst; ++itInst) {
+        CallInst *CI = dyn_cast<CallInst>(itInst);
+        if (!CI) continue;
+
+        if (!OP::IsDxilOpFuncCallInst(CI)) {
+          FuncMap[F].bCallsOtherFunc = true;
+          continue;
+        }
+
+        // Obtain register index for TempRegLoad/TempRegStore.
+        unsigned regIdx = 0;
+        Type *pValType = nullptr;
+        if (DxilInst_TempRegLoad TRL = DxilInst_TempRegLoad(CI)) {
+          regIdx = dyn_cast<ConstantInt>(TRL.get_index())->getZExtValue();
+          pValType = CI->getType();
+        } else if (DxilInst_TempRegStore TRS = DxilInst_TempRegStore(CI)) {
+          regIdx = dyn_cast<ConstantInt>(TRS.get_index())->getZExtValue();
+          pValType = TRS.get_value()->getType();
+        } else {
+          continue;
+        }
+
+        // Update register usage.
+        RegRec &reg = FR.RegMap[regIdx];
+        if (pValType == Type::getFloatTy(*m_pCtx)) {
+          reg.numF32++;
+        } else if (pValType == Type::getInt32Ty(*m_pCtx)) {
+          reg.numI32++;
+        } else if (pValType == Type::getHalfTy(*m_pCtx)) {
+          reg.numF16++;
+        } else if (pValType == Type::getInt16Ty(*m_pCtx)) {
+          reg.numI16++;
+        } else {
+          IFT(DXC_E_OPTIMIZATION_FAILED);
+        }
+      }
+    }
+  }
+
+  // 2. Declare local and global variables to represent each r-register.
+  for (auto &itF : FuncMap) {
+    Function *F = itF.first;
+    FuncRec &FR = itF.second;
+
+    for (auto &itReg : FR.RegMap) {
+      unsigned regIdx = itReg.first;
+      RegRec &reg = itReg.second;
+      DXASSERT_NOMSG(reg.pDecl16 == nullptr && reg.pDecl32 == nullptr);
+
+      enum class DeclKind { None, Alloca, Global };
+      DeclKind Decl32Kind = (reg.numF32 + reg.numI32) == 0 ? DeclKind::None : DeclKind::Alloca;
+      DeclKind Decl16Kind = (reg.numF16 + reg.numI16) == 0 ? DeclKind::None : DeclKind::Alloca;
+      DXASSERT_NOMSG(Decl32Kind == DeclKind::Alloca || Decl16Kind == DeclKind::Alloca);
+      unsigned numF32 = reg.numF32, numI32 = reg.numI32, numF16 = reg.numF16, numI16 = reg.numI16;
+      if (!FR.bEntry || FR.bCallsOtherFunc) {
+        // Check if register is used in another function.
+        for (auto &itF2 : FuncMap) {
+          Function *F2 = itF2.first;
+          FuncRec &FR2 = itF2.second;
+          if (F2 == F || (FR.bEntry && FR2.bEntry)) continue;
+
+          auto itReg2 = FR2.RegMap.find(regIdx);
+          if (itReg2 == FR2.RegMap.end()) continue;
+
+          RegRec &reg2 = itReg2->second;
+          if (Decl32Kind == DeclKind::Alloca && (reg2.numF32 + reg2.numI32) > 0) {
+            Decl32Kind = DeclKind::Global;
+          }
+          if (Decl16Kind == DeclKind::Alloca && (reg2.numF16 + reg2.numI16) > 0) {
+            Decl16Kind = DeclKind::Global;
+          }
+          numF32 += reg2.numF32;
+          numI32 += reg2.numI32;
+          numF16 += reg2.numF16;
+          numI16 += reg2.numI16;
+        }
+      }
+
+      // Declare variables.
+      if (Decl32Kind == DeclKind::Alloca) {
+        Twine regName = Twine("dx.v32.r") + Twine(regIdx);
+        Type *pDeclType = numF32 >= numI32 ? Type::getFloatTy(*m_pCtx) : Type::getInt32Ty(*m_pCtx);
+        Instruction *pAnchor = F->getEntryBlock().begin();
+        AllocaInst *AI = new AllocaInst(pDeclType, nullptr, regName, pAnchor);
+        AI->setAlignment(kRegCompAlignment);
+        reg.pDecl32 = AI;
+      }
+      if (Decl16Kind == DeclKind::Alloca) {
+        Twine regName = Twine("dx.v16.r") + Twine(regIdx);
+        Type *pDeclType = numF16 >= numI16 ? Type::getHalfTy(*m_pCtx) : Type::getInt16Ty(*m_pCtx);
+        Instruction *pAnchor = F->getEntryBlock().begin();
+        AllocaInst *AI = new AllocaInst(pDeclType, nullptr, regName, pAnchor);
+        AI->setAlignment(kRegCompAlignment);
+        reg.pDecl16 = AI;
+      }
+      if (Decl32Kind == DeclKind::Global) {
+        SmallVector<char, 16> regName;
+        (Twine("dx.v32.r") + Twine(regIdx)).toStringRef(regName);
+        Type *pDeclType = numF32 >= numI32 ? Type::getFloatTy(*m_pCtx) : Type::getInt32Ty(*m_pCtx);
+        GlobalVariable *GV = m_pModule->getGlobalVariable(StringRef(regName.data(), regName.size()), true);
+        if (!GV) {
+          GV = new GlobalVariable(*m_pModule, pDeclType, 
+                                  false, GlobalValue::InternalLinkage, 
+                                  UndefValue::get(pDeclType), 
+                                  regName, nullptr, 
+                                  GlobalVariable::NotThreadLocal, DXIL::kDefaultAddrSpace);
+        }
+        GV->setAlignment(kRegCompAlignment);
+        reg.pDecl32 = GV;
+      }
+      if (Decl16Kind == DeclKind::Global) {
+        SmallVector<char, 16> regName;
+        (Twine("dx.v16.r") + Twine(regIdx)).toStringRef(regName);
+        Type *pDeclType = numF16 >= numI16 ? Type::getHalfTy(*m_pCtx) : Type::getInt16Ty(*m_pCtx);
+        GlobalVariable *GV = m_pModule->getGlobalVariable(StringRef(regName.data(), regName.size()), true);
+        if (!GV) {
+          GV = new GlobalVariable(*m_pModule, pDeclType, 
+                                  false, GlobalValue::InternalLinkage, 
+                                  UndefValue::get(pDeclType), 
+                                  regName, nullptr, 
+                                  GlobalVariable::NotThreadLocal, DXIL::kDefaultAddrSpace);
+        }
+        GV->setAlignment(kRegCompAlignment);
+        reg.pDecl16 = GV;
+      }
+    }
+  }
+
+  // 3. Replace TempRegLoad/Store with load/store to declared variables.
+  for (auto itFn = m_pModule->begin(), endFn = m_pModule->end(); itFn != endFn; ++itFn) {
+    Function *F = itFn;
+    if (F->empty()) continue;
+    DXASSERT_NOMSG(FuncMap.find(F) != FuncMap.end());
+    FuncRec &FR = FuncMap[F];
+
+    for (auto itBB = F->begin(), endBB = F->end(); itBB != endBB; ++itBB) {
+      BasicBlock *BB = itBB;
+
+      for (auto itInst = BB->begin(), endInst = BB->end(); itInst != endInst; ) {
+        Instruction *CI = itInst;
+
+        if (DxilInst_TempRegLoad TRL = DxilInst_TempRegLoad(CI)) {
+          // Replace TempRegLoad intrinsic with a load.
+          unsigned regIdx = dyn_cast<ConstantInt>(TRL.get_index())->getZExtValue();
+          RegRec &reg = FR.RegMap[regIdx];
+
+          Type *pValType = CI->getType();
+          Value *pDecl = (pValType == Type::getFloatTy(*m_pCtx) || 
+                          pValType == Type::getInt32Ty(*m_pCtx))   ? reg.pDecl32 : reg.pDecl16;
+          DXASSERT_NOMSG(pValType != nullptr);
+
+          LoadInst *LI = new LoadInst(pDecl, nullptr, CI);
+          Value *pBitcastLI = CastValue(LI, pValType, CI);
+
+          CI->replaceAllUsesWith(pBitcastLI);
+          ++itInst;
+          CI->eraseFromParent();
+        } else if (DxilInst_TempRegStore TRS = DxilInst_TempRegStore(CI)) {
+          // Replace TempRegStore with a store.
+          unsigned regIdx = dyn_cast<ConstantInt>(TRS.get_index())->getZExtValue();
+          RegRec &reg = FR.RegMap[regIdx];
+
+          Value *pValue = TRS.get_value();
+          Type *pValType = pValue->getType();
+          Value *pDecl = (pValType == Type::getFloatTy(*m_pCtx) || 
+                          pValType == Type::getInt32Ty(*m_pCtx))   ? reg.pDecl32 : reg.pDecl16;
+          DXASSERT_NOMSG(pValType != nullptr);
+          Type *pDeclType = cast<PointerType>(pDecl->getType())->getElementType();
+          Value *pBitcastValueToStore = CastValue(pValue, pDeclType, CI);
+
+          StoreInst *SI = new StoreInst(pBitcastValueToStore, pDecl, CI);
+          CI->replaceAllUsesWith(SI);
+          ++itInst;
+          CI->eraseFromParent();
+        } else {
+          ++itInst;
+        }
+      }
+    }
+  }
+}
+
+void DxilCleanup::ConstructSSA() {
+  // Construct SSA for r-register live ranges.
+#if DXILCLEANUP_DBG
+  DXASSERT_NOMSG(!verifyModule(*m_pModule));
+#endif
+
+  PassManager PM;
+  PM.add(createPromoteMemoryToRegisterPass());
+  PM.run(*m_pModule);
+}
+
+// Note: this two-pass initialization scheme limits the algorithm to handling 2^31 live ranges, instead of 2^32.
+#define LIVE_RANGE_UNINITIALIZED (((unsigned)1<<31))
+
+void DxilCleanup::CollectLiveRanges() {
+  // 0. Count and allocate live ranges.
+  unsigned LiveRangeCount = 0;
+  for (auto itFn = m_pModule->begin(), endFn = m_pModule->end(); itFn != endFn; ++itFn) {
+    Function *F = itFn;
+
+    for (auto itBB = F->begin(), endBB = F->end(); itBB != endBB; ++itBB) {
+      BasicBlock *BB = &*itBB;
+
+      for (auto itInst = BB->begin(), endInst = BB->end(); itInst != endInst; ++itInst) {
+        Instruction *I = &*itInst;
+        Type *pType = I->getType();
+
+        if (!pType->isFloatingPointTy() && !pType->isIntegerTy())
+          continue;
+
+        if (m_LiveRangeMap.find(I) != m_LiveRangeMap.end())
+          continue;
+
+        // Count live range.
+        if (LiveRangeCount & LIVE_RANGE_UNINITIALIZED) {
+          // Too many live ranges for our two-pass initialization scheme.
+          DXASSERT(false, "otherwise, more than 2^31 live ranges!");
+          return;
+        }
+        CountLiveRangeRec(LiveRangeCount, I);
+        LiveRangeCount++;
+      }
+    }
+  }
+  m_LiveRanges.resize(LiveRangeCount);
+
+  // 1. Recover live ranges.
+  unsigned LRId = 0;
+  for (auto itFn = m_pModule->begin(), endFn = m_pModule->end(); itFn != endFn; ++itFn) {
+    Function *F = itFn;
+
+    for (auto itBB = F->begin(), endBB = F->end(); itBB != endBB; ++itBB) {
+      BasicBlock *BB = &*itBB;
+
+      for (auto itInst = BB->begin(), endInst = BB->end(); itInst != endInst; ++itInst) {
+        Instruction *I = &*itInst;
+        Type *pType = I->getType();
+
+        if (!pType->isFloatingPointTy() && !pType->isIntegerTy())
+          continue;
+
+        auto it = m_LiveRangeMap.find(I);
+        DXASSERT(it != m_LiveRangeMap.end(), "otherwise, instruction not added to m_LiveRangeMap during counting stage");
+        if (!(it->second & LIVE_RANGE_UNINITIALIZED)) {
+          continue;
+        }
+
+        // Recover a live range.
+        LiveRange &LR = m_LiveRanges[LRId];
+        LR.id = LRId++;
+        RecoverLiveRangeRec(LR, I);
+      }
+    }
+  }
+
+  // 2. Add bitcast edges.
+  for (LiveRange &LR : m_LiveRanges) {
+    for (Value *def : LR.defs) {
+      for (User *U : def->users()) {
+        if (IsDxilBitcast(U)) {
+          DXASSERT_NOMSG(m_LiveRangeMap.find(U) != m_LiveRangeMap.end());
+          DXASSERT(!(m_LiveRangeMap.find(U)->second & LIVE_RANGE_UNINITIALIZED), "otherwise, live range not initialized!");
+          unsigned userLRId = m_LiveRangeMap[U];
+          LR.bitcastMap[userLRId]++;
+        }
+      }
+    }
+  }
+
+#if DXILCLEANUP_DBG
+  // Print live ranges.
+  size_t NumDefs = 0;
+  dbgs() << "Live ranges:\n";
+  for (LiveRange &LR : m_LiveRanges) {
+    NumDefs += LR.defs.size();
+    dbgs() << "id=" << LR.id << ", F=" << LR.numF 
+           << ", I=" << LR.numI << ", U=" << LR.numU << ", defs = {";
+    for (Value *D : LR.defs) {
+      dbgs() << "\n";
+      D->dump();
+    }
+    dbgs() << "}, edges = { ";
+    bool bFirst = true;
+    for (auto it : LR.bitcastMap) {
+      if (!bFirst) {
+        dbgs() << ", ";
+      }
+      dbgs() << "<" << it.first << "," << it.second << ">";
+      bFirst= true;
+    }
+    dbgs() << "}\n";
+  }
+  DXASSERT_NOMSG(NumDefs == m_LiveRangeMap.size());
+#endif
+}
+
+void DxilCleanup::CountLiveRangeRec(unsigned LRId, Instruction *pInst) {
+  if (m_LiveRangeMap.find(pInst) != m_LiveRangeMap.end()) {
+    DXASSERT_NOMSG(m_LiveRangeMap[pInst] == (LRId | LIVE_RANGE_UNINITIALIZED));
+    return;
+  }
+
+  m_LiveRangeMap[pInst] = LRId | LIVE_RANGE_UNINITIALIZED;
+
+  for (User *U : pInst->users()) {
+    if (PHINode *phi = dyn_cast<PHINode>(U)) {
+      CountLiveRangeRec(LRId, phi);
+    }
+  }
+
+  if (PHINode *phi = dyn_cast<PHINode>(pInst)) {
+    for (Use &U : phi->operands()) {
+      if (Instruction *I = dyn_cast<Instruction>(U.get())) {
+        CountLiveRangeRec(LRId, I);
+      }
+    }
+  }
+}
+
+void DxilCleanup::RecoverLiveRangeRec(LiveRange &LR, Instruction *pInst) {
+  auto it = m_LiveRangeMap.find(pInst);
+  DXASSERT_NOMSG(it != m_LiveRangeMap.end());
+  if (!(it->second & LIVE_RANGE_UNINITIALIZED)) {
+    return;
+  }
+
+  it->second &= ~LIVE_RANGE_UNINITIALIZED;
+  LR.defs.push_back(pInst);
+
+  for (User *U : pInst->users()) {
+    if (PHINode *phi = dyn_cast<PHINode>(U)) {
+      RecoverLiveRangeRec(LR, phi);
+    } else if (IsDxilBitcast(U)) {
+      LR.numU++;
+    } else {
+      Type *pType = pInst->getType();
+      if (pType->isFloatingPointTy()) {
+        LR.numF++;
+      } else if (pType->isIntegerTy()) {
+        LR.numI++;
+      } else {
+        DXASSERT_NOMSG(false);
+      }
+    }
+  }
+
+  if (PHINode *phi = dyn_cast<PHINode>(pInst)) {
+    for (Use &U : phi->operands()) {
+      Instruction *I = dyn_cast<Instruction>(U.get());
+      if (I) {
+        RecoverLiveRangeRec(LR, I);
+      } else {
+        DXASSERT_NOMSG(dyn_cast<Constant>(U.get()));
+      }
+    }
+  }
+}
+
+unsigned DxilCleanup::LiveRange::GetCaseNumber() const {
+  if (numI > (numF+numU) || numF > (numI+numU))
+    return 1; // Type is known.
+
+  if (numI == (numF+numU) || numF == (numI+numU))
+    return 2; // Type may change, but unlikely.
+
+  return 3;   // Type is unknown yet. Postpone the decision until more live ranges have types.
+}
+
+void DxilCleanup::LiveRange::GuessType(LLVMContext &Ctx) {
+  DXASSERT_NOMSG(pNewType == nullptr);
+  bool bFlt = false;
+  bool bInt = false;
+  if (numU == 0) {
+    bFlt = numF > numI;
+    bInt = numI > numF;
+  } else {
+    if (numF >= numI + numU) {
+      bFlt = true;
+    } else if (numI >= numF + numU) {
+      bInt = true;
+    } else if (numF > numI) {
+      bFlt = true;
+    } else if (numI > numF) {
+      bInt = true;
+    }
+  }
+
+  Type *pDefType = (*defs.begin())->getType();
+  if (!bFlt && !bInt) {
+    bFlt = pDefType->isFloatingPointTy();
+    bInt = pDefType->isIntegerTy();
+  }
+
+  if ((bFlt && pDefType->isFloatingPointTy()) ||
+      (bInt && pDefType->isIntegerTy())) {
+    pNewType = pDefType;
+    return;
+  }
+
+  if (bFlt) {
+    if (pDefType == Type::getInt16Ty(Ctx)) {
+      pNewType = Type::getHalfTy(Ctx);
+    } else if (pDefType == Type::getInt32Ty(Ctx)) {
+      pNewType = Type::getFloatTy(Ctx);
+    } else if (pDefType == Type::getInt64Ty(Ctx)) {
+      pNewType = Type::getDoubleTy(Ctx);
+    } else {
+      DXASSERT_NOMSG(false);
+    }
+  } else if (bInt) {
+    if (pDefType == Type::getHalfTy(Ctx)) {
+      pNewType = Type::getInt16Ty(Ctx);
+    } else if (pDefType == Type::getFloatTy(Ctx)) {
+      pNewType = Type::getInt32Ty(Ctx);
+    } else if (pDefType == Type::getDoubleTy(Ctx)) {
+      pNewType = Type::getInt64Ty(Ctx);
+    } else {
+      DXASSERT_NOMSG(false);
+    }
+  } else {
+    DXASSERT_NOMSG(false);
+  }
+}
+
+bool DxilCleanup::LiveRange::operator<(const LiveRange &o) const {
+  unsigned case1 = GetCaseNumber();
+  unsigned case2 = o.GetCaseNumber();
+
+  if (case1 != case2)
+    return case1 < case2;
+
+  switch (case1) {
+  case 1:
+  case 2: {
+    unsigned n1 = std::max(numI, numF);
+    unsigned n2 = std::max(o.numI, o.numF);
+    if (n1 != n2)
+      return n2 < n1; 
+    break;
+  }
+  case 3: {
+    double r1 = (double)(numI + numF) / (double)numU;
+    double r2 = (double)(o.numI + o.numF) / (double)o.numU;
+    if (r1 != r2)
+      return r2 < r1;
+    if (numU != o.numU)
+      return numU < o.numU;
+    break;
+  }
+  default:
+    DXASSERT_NOMSG(false);
+    break;
+  }
+
+  return id < o.id;
+}
+
+struct LiveRangeLT {
+  LiveRangeLT(const vector<DxilCleanup::LiveRange> &LiveRanges) : m_LiveRanges(LiveRanges) {}
+  bool operator()(const unsigned i1, const unsigned i2) const {
+    const DxilCleanup::LiveRange &lr1 = m_LiveRanges[i1];
+    const DxilCleanup::LiveRange &lr2 = m_LiveRanges[i2];
+    return lr1 < lr2;
+  }
+
+private:
+  const vector<DxilCleanup::LiveRange> &m_LiveRanges;
+};
+
+void DxilCleanup::InferLiveRangeTypes() {
+  set<unsigned, LiveRangeLT> LiveRangeSet{LiveRangeLT(m_LiveRanges)};
+  // TODO: Evaluate as candidate for optimization.
+
+  // Initialize queue.
+  for (LiveRange &LR : m_LiveRanges) {
+    LiveRangeSet.insert(LR.id);
+  }
+
+  while (!LiveRangeSet.empty()) {
+    unsigned LRId = *LiveRangeSet.cbegin();
+    LiveRange &LR = m_LiveRanges[LRId];
+    LiveRangeSet.erase(LRId);
+
+    // Assign type.
+    LR.GuessType(*m_pCtx);
+
+    // Propagate type assignment to neigboring live ranges.
+    for (auto itp : LR.bitcastMap) {
+      if (LiveRangeSet.find(itp.first) == LiveRangeSet.end())
+        continue;
+
+      unsigned neighborId = itp.first;
+      unsigned numLinks = itp.second;
+      LiveRangeSet.erase(neighborId);
+
+      LiveRange &neighbor = m_LiveRanges[neighborId];
+      if (LR.pNewType->isFloatingPointTy()) {
+        neighbor.numF += numLinks;
+      } else {
+        neighbor.numI += numLinks;
+      }
+      LiveRangeSet.insert(neighborId);
+    }
+  }
+}
+
+void DxilCleanup::ChangeLiveRangeTypes() {
+  for (LiveRange &LR : m_LiveRanges) {
+    Type *pType = (*LR.defs.begin())->getType();
+    if (pType == LR.pNewType)
+      continue;
+
+    // Change live range type.
+    SmallDenseMap<Value *, Value *, 4> DefMap;
+    // a. Create new defs.
+    for (Value *D : LR.defs) {
+      Instruction *pInst = dyn_cast<Instruction>(D);
+      if (PHINode *phi = dyn_cast<PHINode>(pInst)) {
+        PHINode *pNewPhi = PHINode::Create(LR.pNewType, phi->getNumIncomingValues(), phi->getName(), phi->getNextNode());
+        DefMap[D] = pNewPhi;
+      } else {
+        DefMap[D] = CastValue(pInst, LR.pNewType, pInst);
+      }
+    }
+    // b. Fix phi uses.
+    for (Value *D : LR.defs) {
+      if (PHINode *phi = dyn_cast<PHINode>(D)) {
+        DXASSERT_NOMSG(DefMap.find(phi) != DefMap.end());
+        PHINode *pNewPhi = dyn_cast<PHINode>(DefMap[phi]);
+
+        for (unsigned i = 0; i < phi->getNumIncomingValues(); i++) {
+          Value *pVal = phi->getIncomingValue(i);
+          BasicBlock *BB = phi->getIncomingBlock(i);
+          Value *pNewVal = nullptr;
+          if (!isa<Constant>(pVal)) {
+            DXASSERT_NOMSG(DefMap.find(pVal) != DefMap.end());
+            pNewVal = DefMap[pVal];
+          } else {
+            pNewVal = CastValue(pVal, pNewPhi->getType(), BB->getTerminator());
+          }
+          pNewPhi->addIncoming(pNewVal, BB);
+        }
+      }
+    }
+    // c. Fix other uses.
+    for (Value *D : LR.defs) {
+      for (User *U : D->users()) {
+        if (isa<PHINode>(U) || IsDxilBitcast(U))
+          continue;
+
+        Instruction *pNewInst = dyn_cast<Instruction>(DefMap[D]);
+        Value *pRevBitcast = CastValue(pNewInst, pType, pNewInst);
+        U->replaceUsesOfWith(D, pRevBitcast);
+
+        // If the new def is a phi we need to be careful about where we place the bitcast.
+        // For phis we need to place the bitcast after all the phi defs for the block.
+        if (isa<PHINode>(pNewInst) && isa<Instruction>(pRevBitcast) && pRevBitcast != pNewInst) {
+          PHINode *pPhi = cast<PHINode>(pNewInst);
+          Instruction *pInst = cast<Instruction>(pRevBitcast);
+          pInst->removeFromParent();
+          pInst->insertBefore(pPhi->getParent()->getFirstInsertionPt());
+        }
+      }
+    }
+  }
+}
+
+template<typename DxilBitcast1, typename DxilBitcast2>
+static bool CleanupBitcastPattern(Instruction *I1) {
+  if (DxilBitcast1 BC1 = DxilBitcast1(I1)) {
+    Instruction *I2 = dyn_cast<Instruction>(BC1.get_value());
+    if (I2) {
+      if (DxilBitcast2 BC2 = DxilBitcast2(I2)) {
+        I1->replaceAllUsesWith(BC2.get_value());
+      }
+    }
+    return true;
+  }
+  return false;
+}
+
+void DxilCleanup::CleanupPatterns() {
+  for (auto itFn = m_pModule->begin(), endFn = m_pModule->end(); itFn != endFn; ++itFn) {
+    Function *F = itFn;
+
+    for (auto itBB = F->begin(), endBB = F->end(); itBB != endBB; ++itBB) {
+      BasicBlock *BB = &*itBB;
+
+      for (auto itInst = BB->begin(), endInst = BB->end(); itInst != endInst; ++itInst) {
+        Instruction *I1 = &*itInst;
+
+        // Cleanup i1 pattern:
+        // %1 = icmp eq i32 %0, 1
+        // %2 = sext i1 %1 to i32
+        // %3 = icmp ne i32 %2, 0
+        // br i1 %3, ...
+        //
+        // becomes
+        // ...
+        // br i1 %1, ...
+        //
+        if (ICmpInst *pICmp = dyn_cast<ICmpInst>(I1)) {
+          if (pICmp->getPredicate() != CmpInst::Predicate::ICMP_NE)
+            continue;
+
+          Value *O1 = pICmp->getOperand(0);
+          if (O1->getType() != Type::getInt32Ty(*m_pCtx))
+            continue;
+          Value *O2 = pICmp->getOperand(1);
+          if (dyn_cast<ConstantInt>(O1))
+            std::swap(O1, O2);
+
+          ConstantInt *C = dyn_cast<ConstantInt>(O2);
+          if (!C || C->getZExtValue() != 0)
+            continue;
+
+          SExtInst *SE = dyn_cast<SExtInst>(O1);
+          DXASSERT_NOMSG(!SE || SE->getType() == Type::getInt32Ty(*m_pCtx));
+          if (!SE || SE->getSrcTy() != Type::getInt1Ty(*m_pCtx))
+            continue;
+
+          I1->replaceAllUsesWith(SE->getOperand(0));
+
+          continue;
+        }
+
+        // Cleanup chains of bitcasts:
+        // %1 = call float @dx.op.bitcastI32toF32(i32 126, i32 %0)
+        // %2 = call i32 @dx.op.bitcastF32toI32(i32 127, float %1)
+        // %3 = iadd i32 %2, ...
+        //
+        // becomes
+        // ...
+        // %3 = iadd i32 %0, ...
+        //
+        if (CleanupBitcastPattern<DxilInst_BitcastI32toF32, DxilInst_BitcastF32toI32>(I1)) continue;
+        if (CleanupBitcastPattern<DxilInst_BitcastF32toI32, DxilInst_BitcastI32toF32>(I1)) continue;
+        if (CleanupBitcastPattern<DxilInst_BitcastI16toF16, DxilInst_BitcastF16toI16>(I1)) continue;
+        if (CleanupBitcastPattern<DxilInst_BitcastF16toI16, DxilInst_BitcastI16toF16>(I1)) continue;
+        if (CleanupBitcastPattern<DxilInst_BitcastI64toF64, DxilInst_BitcastF64toI64>(I1)) continue;
+        if (CleanupBitcastPattern<DxilInst_BitcastF64toI64, DxilInst_BitcastI64toF64>(I1)) continue;
+
+        // Cleanup chains of doubles:
+        // %7 = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double %6)
+        // %8 = extractvalue %dx.types.splitdouble %7, 0
+        // %9 = extractvalue %dx.types.splitdouble %7, 1
+        // ...
+        // %15 = call double @dx.op.makeDouble.f64(i32 101, i32 %8, i32 %9)
+        // %16 = call double @dx.op.binary.f64(i32 36, double %15, double 0x3FFC51EB80000000)
+        //
+        // becomes (%15 -> %6)
+        // ...
+        // %16 = call double @dx.op.binary.f64(i32 36, double %6, double 0x3FFC51EB80000000)
+        //
+        if (DxilInst_MakeDouble MD = DxilInst_MakeDouble(I1)) {
+          ExtractValueInst *V1 = dyn_cast<ExtractValueInst>(MD.get_hi());
+          ExtractValueInst *V2 = dyn_cast<ExtractValueInst>(MD.get_lo());
+          if (V1 && V2 && V1->getAggregateOperand() == V2->getAggregateOperand() &&
+              V1->getNumIndices() == 1 && V2->getNumIndices() == 1 &&
+              *V1->idx_begin() == 1 && *V2->idx_begin() == 0) {
+            Instruction *pSDInst = dyn_cast<Instruction>(V1->getAggregateOperand());
+            if (!pSDInst) continue;
+
+            if (DxilInst_SplitDouble SD = DxilInst_SplitDouble(pSDInst)) {
+              I1->replaceAllUsesWith(SD.get_value());
+            }
+          }
+          continue;
+        }
+      }
+    }
+  }
+}
+
+void DxilCleanup::RemoveDeadCode() {
+#if DXILCLEANUP_DBG
+  DXASSERT_NOMSG(!verifyModule(*m_pModule));
+#endif
+
+  PassManager PM;
+  PM.add(createDeadCodeEliminationPass());
+  PM.run(*m_pModule);
+}
+
+Value *DxilCleanup::CastValue(Value *pValue, Type *pToType, Instruction *pOrigInst) {
+  Type *pType = pValue->getType();
+
+  if (pType == pToType)
+    return pValue;
+
+  const unsigned kNumTypeArgs = 3;
+  Type *ArgTypes[kNumTypeArgs];
+  DXIL::OpCode OpCode;
+  if (pType == Type::getFloatTy(*m_pCtx)) {
+    IFTBOOL(pToType == Type::getInt32Ty(*m_pCtx), DXC_E_OPTIMIZATION_FAILED);
+    OpCode = DXIL::OpCode::BitcastF32toI32;
+    ArgTypes[0] = Type::getInt32Ty(*m_pCtx);
+    ArgTypes[1] = Type::getInt32Ty(*m_pCtx);
+    ArgTypes[2] = Type::getFloatTy(*m_pCtx);
+  } else if (pType == Type::getInt32Ty(*m_pCtx)) {
+    IFTBOOL(pToType == Type::getFloatTy(*m_pCtx), DXC_E_OPTIMIZATION_FAILED);
+    OpCode = DXIL::OpCode::BitcastI32toF32;
+    ArgTypes[0] = Type::getFloatTy(*m_pCtx);
+    ArgTypes[1] = Type::getInt32Ty(*m_pCtx);
+    ArgTypes[2] = Type::getInt32Ty(*m_pCtx);
+  } else if (pType == Type::getHalfTy(*m_pCtx)) {
+    IFTBOOL(pToType == Type::getInt16Ty(*m_pCtx), DXC_E_OPTIMIZATION_FAILED);
+    OpCode = DXIL::OpCode::BitcastF16toI16;
+    ArgTypes[0] = Type::getInt16Ty(*m_pCtx);
+    ArgTypes[1] = Type::getInt32Ty(*m_pCtx);
+    ArgTypes[2] = Type::getHalfTy(*m_pCtx);
+  } else if (pType == Type::getInt16Ty(*m_pCtx)) {
+    IFTBOOL(pToType == Type::getHalfTy(*m_pCtx), DXC_E_OPTIMIZATION_FAILED);
+    OpCode = DXIL::OpCode::BitcastI16toF16;
+    ArgTypes[0] = Type::getHalfTy(*m_pCtx);
+    ArgTypes[1] = Type::getInt32Ty(*m_pCtx);
+    ArgTypes[2] = Type::getInt16Ty(*m_pCtx);
+  } else if (pType == Type::getDoubleTy(*m_pCtx)) {
+    IFTBOOL(pToType == Type::getInt64Ty(*m_pCtx), DXC_E_OPTIMIZATION_FAILED);
+    OpCode = DXIL::OpCode::BitcastF64toI64;
+    ArgTypes[0] = Type::getInt64Ty(*m_pCtx);
+    ArgTypes[1] = Type::getInt32Ty(*m_pCtx);
+    ArgTypes[2] = Type::getDoubleTy(*m_pCtx);
+  } else if (pType == Type::getInt64Ty(*m_pCtx)) {
+    IFTBOOL(pToType == Type::getDoubleTy(*m_pCtx), DXC_E_OPTIMIZATION_FAILED);
+    OpCode = DXIL::OpCode::BitcastI64toF64;
+    ArgTypes[0] = Type::getDoubleTy(*m_pCtx);
+    ArgTypes[1] = Type::getInt32Ty(*m_pCtx);
+    ArgTypes[2] = Type::getInt64Ty(*m_pCtx);
+  } else {
+    IFT(DXC_E_OPTIMIZATION_FAILED);
+  }
+
+  // Get function.
+  std::string funcName = (Twine("dx.op.") + Twine(OP::GetOpCodeClassName(OpCode))).str();
+  // Try to find exist function with the same name in the module.
+  Function *F = m_pModule->getFunction(funcName);
+  if (!F) {
+    FunctionType *pFT;
+    pFT = FunctionType::get(ArgTypes[0], ArrayRef<Type*>(&ArgTypes[1], kNumTypeArgs-1), false);
+    F = Function::Create(pFT, GlobalValue::LinkageTypes::ExternalLinkage, funcName, m_pModule);
+    F->setCallingConv(CallingConv::C);
+    F->addFnAttr(Attribute::NoUnwind);
+    F->addFnAttr(Attribute::ReadNone);
+  }
+
+  // Create bitcast call.
+  const unsigned kNumArgs = 2;
+  Value *Args[kNumArgs];
+  Args[0] = Constant::getIntegerValue(IntegerType::get(*m_pCtx, 32), APInt(32, (int)OpCode));
+  Args[1] = pValue;
+  CallInst *pBitcast = nullptr;
+  if (Instruction *pInsertAfter = dyn_cast<Instruction>(pValue)) {
+    pBitcast = CallInst::Create(F, ArrayRef<Value*>(&Args[0], kNumArgs), "", pInsertAfter->getNextNode());
+  } else {
+    pBitcast = CallInst::Create(F, ArrayRef<Value*>(&Args[0], kNumArgs), "", pOrigInst);
+  }
+
+  return pBitcast;
+}
+
+bool DxilCleanup::IsDxilBitcast(Value *pValue) {
+  if (Instruction *pInst = dyn_cast<Instruction>(pValue)) {
+    if (OP::IsDxilOpFuncCallInst(pInst)) {
+      OP::OpCode opcode = OP::GetDxilOpFuncCallInst(pInst);
+      switch (opcode) {
+      case OP::OpCode::BitcastF16toI16:
+      case OP::OpCode::BitcastF32toI32:
+      case OP::OpCode::BitcastF64toI64:
+      case OP::OpCode::BitcastI16toF16:
+      case OP::OpCode::BitcastI32toF32:
+      case OP::OpCode::BitcastI64toF64:
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+} // namespace DxilCleanupNS
+
+
+using namespace DxilCleanupNS;
+
+// Publicly exposed interface to pass...
+char &llvm::DxilCleanupID = DxilCleanup::ID;
+
+
+INITIALIZE_PASS_BEGIN(DxilCleanup, "dxil-cleanup", "Optimize DXIL after conversion from DXBC", true, false)
+INITIALIZE_PASS_END  (DxilCleanup, "dxil-cleanup", "Optimize DXIL after conversion from DXBC", true, false)
+
+namespace llvm {
+
+ModulePass *createDxilCleanupPass() {
+  return new DxilCleanup();
+}
+
+}

+ 43 - 0
projects/dxilconv/lib/DxilConvPasses/InitializePasses.cpp

@@ -0,0 +1,43 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// InitializePasses.cpp                                                      //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// Initialization of transformation passes used in DirectX DXBC to DXIL      //
+// converter.                                                                //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#include "DxilConvPasses/ScopeNestInfo.h"
+#include "DxilConvPasses/ScopeNestedCFG.h"
+#include "DxilConvPasses/NormalizeDxil.h"
+#include "DxilConvPasses/DxilCleanup.h"
+#include "dxc/Support/WinIncludes.h"
+#include "dxc/Support/Global.h"
+
+#include "llvm/PassRegistry.h"
+
+using namespace llvm;
+
+
+// Place to put our private pass initialization for opt.exe.
+void __cdecl initializeDxilConvPasses(PassRegistry &Registry) {
+  initializeScopeNestedCFGPass(Registry);
+  initializeScopeNestInfoWrapperPassPass(Registry);
+  initializeNormalizeDxilPassPass(Registry);
+  initializeDxilCleanupPass(Registry);
+}
+
+namespace hlsl {
+HRESULT SetupRegistryPassForDxilConvPasses() {
+  try
+  {
+    PassRegistry &Registry = *PassRegistry::getPassRegistry();
+    initializeDxilConvPasses(Registry);
+  }
+  CATCH_CPP_RETURN_HRESULT();
+  return S_OK;
+}
+}

+ 182 - 0
projects/dxilconv/lib/DxilConvPasses/NormalizeDxil.cpp

@@ -0,0 +1,182 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// NormalizeDxil.cpp                                                         //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// Normalize DXIL transformation.                                            //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#include "DxilConvPasses/NormalizeDxil.h"
+#include "dxc/Support/Global.h"
+
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Dominators.h"
+
+#include "dxc/DXIL/DXILOperations.h"
+#include "dxc/DXIL/DxilInstructions.h"
+
+#include <vector>
+
+using namespace llvm;
+
+//----------------------- Normalize Implementation ------------------------//
+
+// Look for resource handles that were moved to the stack by reg2mem and
+// move them back to registers.
+//
+// We make this change so the dxil will have an actual resource handle as 
+// the argument to a load/store resource instruction instead of being
+// indirected through the stack.
+class NormalizeResourceHandle {
+public:
+  bool Run(Function &F, DominatorTree &DT);
+private:
+  struct ResourceHandleCandidate {
+    Instruction *Alloca;
+    Instruction *CreateHandle;
+  };
+  Instruction *IsResourceHandleAllocaCandidate(BasicBlock *entryBlock, AllocaInst *allocaInst, DominatorTree &DT);
+  void FindCandidates(BasicBlock &entry, std::vector<ResourceHandleCandidate> &candidates, DominatorTree &DT);
+  void ReplaceResourceHandleUsage(const std::vector<ResourceHandleCandidate> &candidates, std::vector<Instruction*> &trash);
+  void Cleanup(std::vector<Instruction *> &trash);
+};
+
+// Check to see if this is a valid resource handle location for replacement:
+// 1. Only used in load/store.
+// 2. Only stored to once.
+// 3. Store value is create handle inst.
+// 4. Create handle dominates all uses of alloca.
+//
+// The check is strict to limit the replacement candidates to those allocas that
+// were inserted by mem2reg and make the replacement trivial.
+Instruction *NormalizeResourceHandle::IsResourceHandleAllocaCandidate(BasicBlock *entryBlock, AllocaInst *allocaInst, DominatorTree &DT) {
+  Instruction *createHandleInst = nullptr;
+  Instruction *const NOT_A_CANDIDATE = nullptr;
+
+  for (User *use : allocaInst->users()) {
+    if (StoreInst *store = dyn_cast<StoreInst>(use)) {
+      if (store->getPointerOperand() != allocaInst) // In case it is used in gep expression.
+        return NOT_A_CANDIDATE;
+
+      Instruction *storedValue = dyn_cast<Instruction>(store->getValueOperand());
+      if (!storedValue)
+        return NOT_A_CANDIDATE;
+
+      hlsl::DxilInst_CreateHandle createHandle(storedValue);
+      if (!createHandle)
+        return NOT_A_CANDIDATE;
+
+      if (createHandleInst && createHandleInst != storedValue)
+        return NOT_A_CANDIDATE;
+
+      createHandleInst = storedValue;
+    }
+    else if (!(isa<LoadInst>(use))) {
+      return NOT_A_CANDIDATE;
+    }
+  }
+
+  for (Use &use : allocaInst->uses()) {
+      if (!DT.dominates(createHandleInst, use))
+          return NOT_A_CANDIDATE;
+  }
+
+  return createHandleInst;
+}
+
+void NormalizeResourceHandle::FindCandidates(BasicBlock &BBEntry, std::vector<ResourceHandleCandidate> &candidates, DominatorTree &DT) {
+  DXASSERT_NOMSG(BBEntry.getTerminator());
+
+  BasicBlock::iterator I = BBEntry.begin();
+  while (isa<AllocaInst>(I)) {
+    if (Instruction *createHandle = IsResourceHandleAllocaCandidate(&BBEntry, cast<AllocaInst>(I), DT))
+    {
+      candidates.push_back({ I, createHandle });
+    }
+    ++I;
+  }
+}
+
+void NormalizeResourceHandle::ReplaceResourceHandleUsage(const std::vector<ResourceHandleCandidate> &candidates, std::vector<Instruction *> &trash) {
+  for (const ResourceHandleCandidate &candidate : candidates) {
+    for (User *use : candidate.Alloca->users()) {
+      if (LoadInst *load = dyn_cast<LoadInst>(use)) {
+        load->replaceAllUsesWith(candidate.CreateHandle);
+        trash.push_back(load);
+      }
+      else if (StoreInst *store = dyn_cast<StoreInst>(use)) {
+        trash.push_back(store);
+      }
+      else {
+        DXASSERT(false, "should only have load and store insts");
+      }
+    }
+
+    trash.push_back(candidate.Alloca);
+  }
+}
+
+void NormalizeResourceHandle::Cleanup(std::vector<Instruction*> &trash) {
+  for (Instruction *inst : trash) {
+    inst->eraseFromParent();
+  }
+
+  trash.clear();
+}
+
+bool NormalizeResourceHandle::Run(Function &function, DominatorTree &DT) {
+  std::vector<ResourceHandleCandidate> candidates;
+  std::vector<Instruction *> trash;
+
+  FindCandidates(function.getEntryBlock(), candidates, DT);
+  ReplaceResourceHandleUsage(candidates, trash);
+  Cleanup(trash);
+
+  return candidates.size() > 0;
+}
+
+class NormalizeDxil {
+public:
+  NormalizeDxil(Function &F, DominatorTree &DT) : m_function(F), m_dominatorTree(DT) {}
+
+  bool Run();
+
+private:
+  Function &m_function;
+  DominatorTree &m_dominatorTree;
+};
+
+
+bool NormalizeDxil::Run() {
+  return NormalizeResourceHandle().Run(m_function, m_dominatorTree);
+}
+
+
+//----------------------- Pass Implementation ------------------------//
+char NormalizeDxilPass::ID = 0;
+INITIALIZE_PASS_BEGIN(NormalizeDxilPass, "normalizedxil", "Normalize dxil pass", false, false)
+INITIALIZE_PASS_END(NormalizeDxilPass, "normalizedxil", "Normalize dxil pass", false, false)
+
+
+FunctionPass *llvm::createNormalizeDxilPass() {
+  return new NormalizeDxilPass();
+}
+
+bool NormalizeDxilPass::runOnFunction(Function &F) {
+  DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  NormalizeDxil normalizer(F, DT);
+  return normalizer.Run();
+}
+
+void NormalizeDxilPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+  AU.addRequired<DominatorTreeWrapperPass>();
+}

+ 100 - 0
projects/dxilconv/lib/DxilConvPasses/ScopeNestInfo.cpp

@@ -0,0 +1,100 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// ScopeNestInfo.cpp                                                         //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// Implements ScopeNestInfo class to hold the results of the scope           //
+// nest analysis.                                                            //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#include "DxilConvPasses/ScopeNestInfo.h"
+#include "DxilConvPasses/ScopeNestIterator.h"
+
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+
+//----------------------- Scope Nest Info Implementation ---------------------//
+void ScopeNestInfo::print(raw_ostream &out) const
+{
+    out << "ScopeNestInfo:\n";
+    int level = 0;
+    for (const ScopeNestEvent &element : m_scopeElements) {
+        if (element.IsEndScope())
+            --level;
+
+        if (element.IsBeginScope()) {
+            if (element.Block)
+                indent(out, level, element.Block->getName()) << "\n";
+            indent(out, level, "@") << element.GetElementTypeName() << "\n";
+        }
+        else if (element.IsEndScope()) {
+            indent(out, level, "@") << element.GetElementTypeName() << "\n";
+            if (element.Block)
+                indent(out, level, element.Block->getName()) << "\n";
+        }
+        else {
+            if (element.Block)
+                indent(out, level, element.Block->getName()) << "\n";
+            
+            if (element.ElementType == ScopeNestEvent::Type::If_Else ||
+                element.ElementType == ScopeNestEvent::Type::Switch_Case)
+                indent(out, level-1, "@") <<  element.GetElementTypeName() << "\n";
+            else if (element.ElementType != ScopeNestEvent::Type::Body)
+                indent(out, level, "@") << element.GetElementTypeName() << "\n";
+        }
+
+        if (element.IsBeginScope())
+            ++level;
+    }
+}
+
+raw_ostream &ScopeNestInfo::indent(raw_ostream &out, int level, StringRef str) const {
+    for (int i = 0; i < level; ++i)
+        out << "    ";
+    out << str;
+    return out;
+}
+
+void ScopeNestInfo::releaseMemory() {
+    m_scopeElements.clear();
+}
+
+void ScopeNestInfo::Analyze(Function &F) {
+    for (ScopeNestIterator I = ScopeNestIterator::begin(F), E = ScopeNestIterator::end(); I != E; ++I) {
+        ScopeNestEvent element = *I;
+        m_scopeElements.push_back(element);
+    }
+}
+
+//----------------------- Wrapper Pass Implementation ------------------------//
+char ScopeNestInfoWrapperPass::ID = 0;
+INITIALIZE_PASS_BEGIN(ScopeNestInfoWrapperPass, "scopenestinfo", "Scope nest info pass", true, true)
+INITIALIZE_PASS_END(ScopeNestInfoWrapperPass,   "scopenestinfo",   "Scope nest info pass", true, true)
+
+
+FunctionPass *llvm::createScopeNestInfoWrapperPass() {
+    return new ScopeNestInfoWrapperPass();
+}
+
+bool ScopeNestInfoWrapperPass::runOnFunction(Function &F) { 
+    releaseMemory();
+    SI.Analyze(F);
+    return false;
+}
+
+void ScopeNestInfoWrapperPass::releaseMemory() {
+    SI.releaseMemory();
+}
+
+void ScopeNestInfoWrapperPass::print(raw_ostream &O, const Module *M) const {
+    SI.print(O);
+}
+
+void ScopeNestInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesAll();
+}

+ 1872 - 0
projects/dxilconv/lib/DxilConvPasses/ScopeNestedCFG.cpp

@@ -0,0 +1,1872 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// ScopeNestedCFG.cpp                                                        //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// Implements the ScopeNested CFG Transformation.                            //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#include "DxilConvPasses/ScopeNestedCFG.h"
+#include "llvm/Analysis/ReducibilityAnalysis.h"
+#include "dxc/Support/Global.h"
+
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Pass.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/BitVector.h"
+
+#include <vector>
+#include <unordered_map>
+#include <unordered_set>
+#include <set>
+#include <algorithm>
+
+using namespace llvm;
+using std::unique_ptr;
+using std::shared_ptr;
+using std::pair;
+using std::vector;
+using std::unordered_map;
+using std::unordered_set;
+using std::set;
+
+#define SNCFG_DBG   0
+
+//===----------------------------------------------------------------------===//
+//                    ScopeNested CFG Transformation
+//===----------------------------------------------------------------------===//
+//
+// The transformation requires the following LLVM passes:
+// -simplifycfg -loop-simplify -reg2mem_hlsl to be run on each function.
+// This is to rely on LLVM standard loop analysis info and to be able to clone 
+// basic blocks, if necessary.
+//
+// The core of the algorithm is the transformation of an acyclic CFG region into 
+// a region that corresponds to control-flow with structured nested scopes.
+// Scoping information is conveyed by inserting helper basic blocks (BBs) and
+// annotating their terminators with the corresponding "dx.BranchKind" metadata
+// (see BranchKind enum in ScopeNestedCFG.h) to make it possible for clients
+// to recover the structure after the pass.
+// 
+// To handle loops, the algorithm transforms each loop nest from the deepest 
+// nested loop upwards. Each transformed loop is conceptually treated as a single loop node,
+// defined by LoopEntry and LoopExit (if there is an exit) BB pair.
+// A loop is made acyclic region by "removing" its backedge.
+// The process finishes with transforming function body starting from the entry basic block (BB).
+//
+// Tranforming an acyclic region.
+// 1. Topological ordering is done by DFS graph traversal.
+//    - Each BB is assigned an ID
+//    - For each BB, a set of all reachable BBs is computed.
+// 2. Using topological block order, reachable merge points are propagated along predecessors,
+//    and for each split point (if, switch), the closest merge point is determined, by intersecting 
+//    reachable merge point sets of the successor BBs. A switch uses a heuristic that picks
+//    the closest merge point reachable via majority of successors.
+// 3. The CFG is tranformed to have scope-nested structure. Here are some interesting details:
+//    - A custom scope-stack is used to recover scopes.
+//    - The tranformation operates on the original CFG, with its original structure preserved 
+//      during transformation until the very last moment.
+//      Cloned BBs are inserted into the CFG and their terminators temporarily form self-loops.
+//      The implementation maintains a set of edges to instantiate as the final, which
+//      destroys the original CFG.
+//    - Loops are treated as a single loop node identified via two BBs: LoopBegin->LoopEnd.
+//      There is a subroutine to clone an entire loop, if there is a need.
+//    - The branches are annotated with dx.BranchKind.
+//    - For a switch scope, the tranformation identifies switch breaks, and then recomputes merge points
+//      for scopes nested inside the switch scope.
+//
+
+namespace ScopeNestedCFGNS {
+
+class ScopeNestedCFG : public FunctionPass {
+public:
+  static char ID;
+
+  explicit ScopeNestedCFG() 
+  : FunctionPass(ID)
+  , m_HelperExitCondIndex(0) {
+    Clear();
+    initializeScopeNestedCFGPass(*PassRegistry::getPassRegistry());
+  }
+
+  virtual bool runOnFunction(Function &F);
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.addRequiredID(ReducibilityAnalysisID);
+    AU.addRequired<LoopInfoWrapperPass>();
+  }
+
+private:
+  struct LoopItem {
+    BasicBlock *pLB;  // Loop begin
+    BasicBlock *pLE;  // Loop end
+    BasicBlock *pLP;  // Loop preheader
+    BasicBlock *pLL;  // Loop latch
+
+    LoopItem() { pLB = pLE = pLP = pLL = nullptr; }
+  };
+
+  LLVMContext *m_pCtx;
+  Module *m_pModule;
+  Function *m_pFunc;
+  LoopInfo *m_pLI;
+  unsigned m_HelperExitCondIndex;
+  BasicBlock *m_pLoopHeader;
+  vector<Loop *> m_Loops;
+  unordered_map<BasicBlock*, LoopItem> m_LoopMap;
+  unordered_map<BasicBlock*, BasicBlock*> m_LE2LBMap;
+
+  void Clear();
+
+  //
+  // Preliminary CFG transformations and related utilities.
+  //
+  void SanitizeBranches();
+  void SanitizeBranchesRec(BasicBlock *pBB, unordered_set<BasicBlock *> &VisitedBB);
+  void CollectUniqueSuccessors(const BasicBlock *pBB,
+                               const BasicBlock *pSuccessorToExclude,
+                               vector<BasicBlock *> &Successors);
+
+  //
+  // Loop region transformations.
+  //
+  void CollectLoopsRec(Loop *pLoop);
+
+  void AnnotateBranch(BasicBlock *pBB, BranchKind Kind);
+  BranchKind GetBranchAnnotation(const BasicBlock *pBB);
+  void RemoveBranchAnnotation(BasicBlock *pBB);
+
+  void GetUniqueExitBlocks(const SmallVectorImpl<Loop::Edge> &ExitEdges, SmallVectorImpl<BasicBlock *> &ExitBlocks);
+  bool IsLoopBackedge(BasicBlock *pNode);
+  bool IsAcyclicRegionTerminator(const BasicBlock *pBB);
+
+  BasicBlock *GetEffectiveNodeToFollowSuccessor(BasicBlock *pBB);
+  bool IsMergePoint(BasicBlock *pBB);
+
+  BasicBlock *SplitEdge(BasicBlock *pBB, unsigned SuccIdx, const Twine &Name, Loop *pLoop, BasicBlock *pToInsertBB);
+  BasicBlock *SplitEdge(BasicBlock *pBB, BasicBlock *pSucc, const Twine &Name, Loop *pLoop, BasicBlock *pToInsertBB);
+  /// Ensure that the latch node terminates by an unconditional branch. Return the latch node.
+  BasicBlock *SanitizeLoopLatch(Loop *pLoop);
+  unsigned GetHelperExitCondIndex() { return m_HelperExitCondIndex++; }
+  /// Ensure that loop has either single exit or no exits. Return the exit node or nullptr.
+  BasicBlock *SanitizeLoopExits(Loop *pLoop);
+  void SanitizeLoopContinues(Loop *pLoop);
+  void AnnotateLoopBranches(Loop *pLoop, LoopItem *pLI);
+
+  //
+  // BasicBlock topological order and reachability sets for acyclic region.
+  //
+  class BlockTopologicalOrderAndReachability {
+  public:
+    void AppendBlock(BasicBlock *pBB, unique_ptr<BitVector> ReachableBBs);
+    unsigned GetNumBlocks() const;
+    BasicBlock *GetBlock(unsigned Id) const;
+    unsigned GetBlockId(BasicBlock *pBB) const;
+    BitVector *GetReachableBBs(BasicBlock *pBB) const;
+    BitVector *GetReachableBBs(unsigned Id) const;
+
+    void dump(raw_ostream &OS) const;
+
+  private:
+    struct BasicBlockState {
+      BasicBlock *pBB;
+      unique_ptr<BitVector> ReachableBBs;
+      BasicBlockState(BasicBlock *p, unique_ptr<BitVector> bv) : pBB(p), ReachableBBs(std::move(bv)) {}
+    };
+    vector<BasicBlockState> m_BlockState;
+    unordered_map<BasicBlock *, unsigned> m_BlockIdMap;
+  };
+  void ComputeBlockTopologicalOrderAndReachability(BasicBlock *pEntry, BlockTopologicalOrderAndReachability &BTO);
+  void ComputeBlockTopologicalOrderAndReachabilityRec(BasicBlock *pNode,
+                                                      BlockTopologicalOrderAndReachability &BTO,
+                                                      unordered_map<BasicBlock *, unsigned> &Marks);
+
+  //
+  // Recovery of scope end points.
+  //
+  struct MergePointInfo {
+    unsigned MP;  // Index of the merge point, if known.
+    set<unsigned> CandidateSet;
+  };
+  using MergePointsMap = unordered_map<BasicBlock *, unique_ptr<MergePointInfo> >;
+  using ScopeEndPointsMap = unordered_map<BasicBlock *, BasicBlock *>;
+  using SwitchBreaksMap = unordered_map<BasicBlock *, BasicBlock *>;
+
+  void DetermineScopeEndPoints(BasicBlock *pEntry,
+                               bool bRecomputeSwitchScope,
+                               const BlockTopologicalOrderAndReachability &BTO,
+                               const SwitchBreaksMap &SwitchBreaks,
+                               ScopeEndPointsMap &ScopeEndPoints,
+                               ScopeEndPointsMap &DeltaScopeEndPoints);
+  void DetermineReachableMergePoints(BasicBlock *pEntry,
+                                     BasicBlock *pExit,
+                                     bool bRecomputeSwitchScope,
+                                     const BitVector *pReachableBBs,
+                                     const BlockTopologicalOrderAndReachability &BTO,
+                                     const SwitchBreaksMap &SwitchBreaks,
+                                     const ScopeEndPointsMap &OldScopeEndPoints,
+                                     MergePointsMap &MergePoints);
+  void DetermineSwitchBreaks(BasicBlock *pSwitchBegin,
+                             const ScopeEndPointsMap &ScopeEndPoints,
+                             const BlockTopologicalOrderAndReachability &BTO,
+                             SwitchBreaksMap &SwitchBreaks);
+
+  //
+  // Transformation of acyclic region.
+  //
+  void TransformAcyclicRegion(BasicBlock *pEntry);
+
+  // Scope stack.
+  struct ScopeStackItem {
+    enum class Kind {
+      Invalid = 0,
+      Return,
+      Fallthrough,
+      If,
+      Switch,
+    };
+
+    Kind ScopeKind;
+
+    BasicBlock *pScopeBeginBB;
+    BasicBlock *pClonedScopeBeginBB;
+    BasicBlock *pScopeEndBB;
+    BasicBlock *pClonedScopeEndBB;
+
+    unsigned SuccIdx;
+    BasicBlock *pPrevSuccBB;
+    BasicBlock *pClonedPrevSuccBB;
+
+    shared_ptr<ScopeEndPointsMap> ScopeEndPoints;
+    bool bRestoreIfScopeEndPoint;
+    shared_ptr<ScopeEndPointsMap> DeltaScopeEndPoints;
+    shared_ptr<SwitchBreaksMap> SwitchBreaks;
+
+    ScopeStackItem()
+    : ScopeKind(Kind::Invalid)
+    , pScopeBeginBB(nullptr)
+    , pClonedScopeBeginBB(nullptr)
+    , pScopeEndBB(nullptr)
+    , pClonedScopeEndBB(nullptr)
+    , SuccIdx(0)
+    , pPrevSuccBB(nullptr)
+    , pClonedPrevSuccBB(nullptr)
+    , bRestoreIfScopeEndPoint(false)
+    {}
+  };
+  vector<ScopeStackItem> m_ScopeStack;
+
+  ScopeStackItem &PushScope(BasicBlock *pBB);
+  ScopeStackItem &RePushScope(const ScopeStackItem &Scope);
+  ScopeStackItem *GetScope(unsigned Idx = 0);
+  ScopeStackItem *FindParentScope(ScopeStackItem::Kind ScopeKind);
+  void PopScope();
+
+  // Cloning.
+  void AddEdge(BasicBlock *pClonedSrcBB, unsigned SuccSlotIdx, BasicBlock *pDstBB,
+               unordered_map<BasicBlock *, vector<BasicBlock *> > &Edges);
+  BasicBlock *CloneBasicBlockAndFixupValues(const BasicBlock *pBB,
+                                            ValueToValueMapTy &RegionValueRemap,
+                                            const Twine &NameSuffix = "");
+  BasicBlock *CloneNode(BasicBlock *pBB, 
+                        unordered_map<BasicBlock *, vector<BasicBlock *> > &BlockClones,
+                        ValueToValueMapTy &RegionValueRemap);
+  BasicBlock *CloneLoop(BasicBlock *pHeaderBB,
+                        BasicBlock *pClonedPreHeaderBB,
+                        unordered_map<BasicBlock *, vector<BasicBlock *> > &BlockClones,
+                        unordered_map<BasicBlock *, vector<BasicBlock *> > &Edges,
+                        ValueToValueMapTy &RegionValueRemap);
+  BasicBlock *CloneLoopRec(BasicBlock *pBB,
+                    BasicBlock *pClonedPredBB,
+                    unsigned ClonedPredIdx,
+                    unordered_map<BasicBlock *, vector<BasicBlock *> > &BlockClones,
+                    unordered_map<BasicBlock *, vector<BasicBlock *> > &Edges,
+                    unordered_set<BasicBlock *> &VisitedBlocks,
+                    const LoopItem &LI,
+                    LoopItem &ClonedLI,
+                    ValueToValueMapTy &RegionValueRemap);
+
+  //
+  // Utility functions.
+  //
+  bool IsIf(BasicBlock *pBB);
+  bool IsIf(TerminatorInst *pTI);
+  bool IsSwitch(BasicBlock *pBB);
+  bool IsSwitch(TerminatorInst *pTI);
+  Value *GetFalse();
+  Value *GetTrue();
+  ConstantInt *GetI32Const(int v);
+  void DumpIntSet(raw_ostream &s, set<unsigned> Set);
+};
+
+char ScopeNestedCFG::ID = 0;
+
+
+bool ScopeNestedCFG::runOnFunction(Function &F) {
+#if SNCFG_DBG
+  dbgs() << "ScopeNestedCFG: processing function " << F.getName();
+#endif
+  Clear();
+
+  m_pCtx = &F.getContext();
+  m_pModule = F.getParent();
+  m_pFunc = &F;
+  m_pLI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+
+  // Sanitize branches.
+  SanitizeBranches();
+
+  // Collect loops innermost to outermost.
+  for (auto itLoop = m_pLI->begin(), endLoop = m_pLI->end(); itLoop != endLoop; ++itLoop) {
+    Loop *pLoop = *itLoop;
+    CollectLoopsRec(pLoop);
+  }
+
+  //
+  // Phase 1:
+  //   - verify, analyze and prepare loop shape
+  //   - record loop information
+  //   - classify and annotate loop branches
+  //
+  for (size_t iLoop = 0; iLoop < m_Loops.size(); iLoop++) {
+    Loop *pLoop = m_Loops[iLoop];
+    BasicBlock *pPreHeader = pLoop->getLoopPreheader();
+    BasicBlock *pHeader = pLoop->getHeader();
+    BasicBlock *pLatch = pLoop->getLoopLatch();
+    BasicBlock *pExit = nullptr;
+
+    // Make sure there is preheader.
+    IFTBOOL(pPreHeader != nullptr, DXC_E_SCOPE_NESTED_FAILED);
+
+    // Make sure there is a single backedge.
+    IFTBOOL(pLoop->getNumBackEdges() == 1, DXC_E_SCOPE_NESTED_FAILED);
+
+    // Prepare loop latch.
+    pLatch = SanitizeLoopLatch(pLoop);
+
+    // Prepare exits and breaks.
+    pExit = SanitizeLoopExits(pLoop);
+
+    // Prepare continues.
+    SanitizeLoopContinues(pLoop);
+
+    // Record essential loop information.
+    LoopItem LI;
+    LI.pLB = pHeader;
+    LI.pLE = pExit;
+    LI.pLL = pLatch;
+    LI.pLP = pPreHeader;
+    DXASSERT_NOMSG(m_LoopMap.find(LI.pLB) == m_LoopMap.end());
+    m_LoopMap[LI.pLB] = LI;
+    if (LI.pLE != nullptr) {
+      DXASSERT_NOMSG(m_LE2LBMap.find(LI.pLE) == m_LE2LBMap.end());
+      m_LE2LBMap[LI.pLE] = LI.pLB;
+    }
+
+    // Annotate known branches for the loop.
+    AnnotateLoopBranches(pLoop, &LI);
+  }
+
+  //
+  // Phase 2:
+  //   - for each loop from most inner:
+  //     + "remove" backedge
+  //     + transform acyclic region
+  //   - transform entry region
+  //
+  for (size_t iLoop = 0; iLoop < m_Loops.size(); iLoop++) {
+    Loop *pLoop = m_Loops[iLoop];
+    BasicBlock *pHeader = pLoop->getHeader();
+    LoopItem LI = m_LoopMap[pHeader];
+    BasicBlock *pLatch = LI.pLL;
+    DXASSERT_LOCALVAR_NOMSG(pLatch, pLatch->getTerminator()->getNumSuccessors() == 1 && pLatch->getTerminator()->getSuccessor(0) == pHeader);
+
+    m_pLoopHeader = pHeader;
+
+    TransformAcyclicRegion(pHeader);
+  }
+
+  m_pLoopHeader = nullptr;
+  TransformAcyclicRegion(F.begin());
+
+  return true;
+}
+
+void ScopeNestedCFG::Clear() {
+  m_pCtx = nullptr;
+  m_pModule = nullptr;
+  m_pFunc = nullptr;
+  m_pLI = nullptr;
+  m_HelperExitCondIndex = 0;
+  m_pLoopHeader = nullptr;
+  m_Loops.clear();
+  m_LoopMap.clear();
+  m_LE2LBMap.clear();
+}
+
+
+//-----------------------------------------------------------------------------
+// Preliminary CFG transformations and related utilities.
+//-----------------------------------------------------------------------------
+void ScopeNestedCFG::SanitizeBranches() {
+  unordered_set<BasicBlock *> VisitedBB;
+  SanitizeBranchesRec(m_pFunc->begin(), VisitedBB);
+}
+
+void ScopeNestedCFG::SanitizeBranchesRec(BasicBlock *pBB, unordered_set<BasicBlock *> &VisitedBB) {
+  // Mark pBB as visited, and return if pBB already has been visited.
+  if (!VisitedBB.emplace(pBB).second)
+    return;
+
+  // Sanitize branch.
+  if (BranchInst *I = dyn_cast<BranchInst>(pBB->getTerminator())) {
+    // a. Convert a conditional branch to unconditional, if successors are the same.
+    if (I->isConditional()) {
+      BasicBlock *pSucc1 = I->getSuccessor(0);
+      BasicBlock *pSucc2 = I->getSuccessor(1);
+      if (pSucc1 == pSucc2) {
+        BranchInst::Create(pSucc1, I);
+        I->eraseFromParent();
+      }
+    }
+  }
+  else if (SwitchInst *I = dyn_cast<SwitchInst>(pBB->getTerminator())) {
+    // b. Group switch successors.
+    struct SwitchCaseGroup {
+      BasicBlock *pSuccBB;
+      vector<ConstantInt *> CaseValue;
+    };
+    vector<SwitchCaseGroup> SwitchCaseGroups;
+    unordered_map<BasicBlock *, unsigned> BB2GroupIdMap;
+    BasicBlock *pDefaultBB = I->getDefaultDest();
+
+    for (SwitchInst::CaseIt itCase = I->case_begin(), endCase = I->case_end(); itCase != endCase; ++itCase) {
+      BasicBlock *pSuccBB = itCase.getCaseSuccessor();
+      ConstantInt *pCaseValue = itCase.getCaseValue();
+
+      if (pSuccBB == pDefaultBB) {
+        // Assimilate this case label into default label.
+        continue;
+      }
+
+      auto itGroup = BB2GroupIdMap.insert( {pSuccBB, SwitchCaseGroups.size()} );
+      if (itGroup.second) {
+        SwitchCaseGroups.emplace_back(SwitchCaseGroup{});
+      }
+
+      SwitchCaseGroup &G = SwitchCaseGroups[itGroup.first->second];
+      G.pSuccBB = pSuccBB;
+      G.CaseValue.emplace_back(pCaseValue);
+    }
+
+    if (SwitchCaseGroups.size() == 0) {
+      // All case labels were assimilated into the default label.
+      // Replace switch with an unconditional branch.
+      BranchInst::Create(pDefaultBB, I);
+      I->eraseFromParent();
+    } else {
+      // Rewrite switch instruction such that case labels are grouped by the successor.
+      unsigned CaseIdx = 0;
+      for (const SwitchCaseGroup &G : SwitchCaseGroups) {
+        for (ConstantInt *pCaseValue : G.CaseValue) {
+          SwitchInst::CaseIt itCase(I, CaseIdx++);
+          itCase.setSuccessor(G.pSuccBB);
+          itCase.setValue(pCaseValue);
+        }
+      }
+      // Remove unused case labels.
+      for (unsigned NumCases = I->getNumCases(); CaseIdx < NumCases; NumCases--) {
+        I->removeCase(SwitchInst::CaseIt{I, NumCases-1});
+      }
+    }
+  }
+
+  // Recurse, visiting each successor group once.
+  TerminatorInst * pTI = pBB->getTerminator();
+  BasicBlock *pPrevSuccBB = nullptr;
+  for (unsigned i = 0; i < pTI->getNumSuccessors(); i++) {
+    BasicBlock *pSuccBB = pTI->getSuccessor(i);
+    if (pSuccBB != pPrevSuccBB) {
+      SanitizeBranchesRec(pSuccBB, VisitedBB);
+    }
+    pPrevSuccBB = pSuccBB;
+  }
+}
+
+void ScopeNestedCFG::CollectUniqueSuccessors(const BasicBlock *pBB,
+                                             const BasicBlock *pSuccessorToExclude,
+                                             vector<BasicBlock *> &Successors) {
+  DXASSERT_NOMSG(Successors.empty());
+  const TerminatorInst *pTI = pBB->getTerminator();
+  BasicBlock *pPrevSuccBB = nullptr;
+  for (unsigned i = 0; i < pTI->getNumSuccessors(); i++) {
+    BasicBlock *pSuccBB = pTI->getSuccessor(i);
+
+    if (pSuccBB != pPrevSuccBB) {
+      pPrevSuccBB = pSuccBB;
+
+      if (pSuccBB != pSuccessorToExclude)
+        Successors.emplace_back(pSuccBB);
+    }
+  }
+}
+
+
+//-----------------------------------------------------------------------------
+// Loop region transformations.
+//-----------------------------------------------------------------------------
+void ScopeNestedCFG::CollectLoopsRec(Loop *pLoop) {
+  for (auto itLoop = pLoop->begin(), endLoop = pLoop->end(); itLoop != endLoop; ++itLoop ) {
+    Loop *pNestedLoop = *itLoop;
+    CollectLoopsRec(pNestedLoop);
+  }
+
+  m_Loops.emplace_back(pLoop);
+}
+
+void ScopeNestedCFG::AnnotateBranch(BasicBlock *pBB, BranchKind Kind) {
+  TerminatorInst *pTI = pBB->getTerminator();
+  DXASSERT(dyn_cast<BranchInst>(pTI) != nullptr || dyn_cast<SwitchInst>(pTI) != nullptr, "annotate only branch and switch terminators");
+
+  // Check that we are not changing the annotation.
+  MDNode *pMD = pTI->getMetadata("dx.BranchKind");
+  if (pMD != nullptr) {
+    ConstantAsMetadata *p1 = dyn_cast<ConstantAsMetadata>(pMD->getOperand(0));
+    ConstantInt *pVal = dyn_cast<ConstantInt>(p1->getValue());
+    BranchKind OldKind = (BranchKind)pVal->getZExtValue();
+    DXASSERT_LOCALVAR(OldKind, OldKind == Kind ||
+             (OldKind == BranchKind::IfBegin && Kind == BranchKind::IfNoEnd) ||
+             (OldKind == BranchKind::IfNoEnd && Kind == BranchKind::IfBegin),
+             "the algorithm should not be changing branch types implicitly (unless it is an if)");
+  }
+
+  pTI->setMetadata("dx.BranchKind", MDNode::get(*m_pCtx, ConstantAsMetadata::get(GetI32Const((int)Kind))));
+}
+
+BranchKind ScopeNestedCFG::GetBranchAnnotation(const BasicBlock *pBB) {
+  const TerminatorInst *pTI = pBB->getTerminator();
+  MDNode *pMD = pTI->getMetadata("dx.BranchKind");
+  if (pMD != nullptr) {
+    ConstantAsMetadata *p1 = dyn_cast<ConstantAsMetadata>(pMD->getOperand(0));
+    ConstantInt *pVal = dyn_cast<ConstantInt>(p1->getValue());
+    return (BranchKind)pVal->getZExtValue();
+  }
+  return BranchKind::Invalid;
+}
+
+void ScopeNestedCFG::RemoveBranchAnnotation(BasicBlock *pBB) {
+  TerminatorInst *pTI = pBB->getTerminator();
+  pTI->setMetadata("dx.BranchKind", nullptr);
+}
+
+void ScopeNestedCFG::GetUniqueExitBlocks(const SmallVectorImpl<Loop::Edge> &ExitEdges,
+                                         SmallVectorImpl<BasicBlock *> &ExitBlocks) {
+  DXASSERT_NOMSG(ExitBlocks.empty());
+  unordered_set<BasicBlock *> S;
+  for (size_t i = 0; i < ExitEdges.size(); i++) {
+    const Loop::Edge &E = ExitEdges[i];
+    BasicBlock *B = const_cast<BasicBlock *>(E.second);
+    auto itp = S.insert(B);
+    if (itp.second) {
+      ExitBlocks.push_back(B);
+    }
+  }
+}
+
+bool ScopeNestedCFG::IsLoopBackedge(BasicBlock *pNode) {
+  BranchKind BK = GetBranchAnnotation(pNode);
+
+  if (BK == BranchKind::LoopBackEdge) {
+    DXASSERT_NOMSG(pNode->getTerminator()->getNumSuccessors() == 1);
+    DXASSERT_NOMSG(pNode->getTerminator()->getSuccessor(0) == m_pLoopHeader);
+    return true;
+  }
+
+  return false;
+}
+
+BasicBlock *ScopeNestedCFG::GetEffectiveNodeToFollowSuccessor(BasicBlock *pBB) {
+  BasicBlock *pEffectiveSuccessor = nullptr;
+  BranchKind BK = GetBranchAnnotation(pBB);
+
+  switch (BK) {
+  case BranchKind::LoopBegin: {
+    TerminatorInst *pTI = pBB->getTerminator();
+    DXASSERT_NOMSG(pTI->getNumSuccessors() == 1);
+    BasicBlock *pLoopHead = pTI->getSuccessor(0);
+    auto itLoop = m_LoopMap.find(pLoopHead);
+    DXASSERT_NOMSG(itLoop != m_LoopMap.end());
+    const LoopItem &LI = itLoop->second;
+    DXASSERT_NOMSG(LI.pLB == pLoopHead && LI.pLP == pBB);
+    DXASSERT_NOMSG(LI.pLE->getTerminator()->getNumSuccessors() == 1);
+    pEffectiveSuccessor = LI.pLE;
+    break;
+  }
+
+  case BranchKind::LoopNoEnd:
+    pEffectiveSuccessor = nullptr;
+    break;
+
+  default:
+    pEffectiveSuccessor = pBB;
+    break;
+  }
+
+  return pEffectiveSuccessor;
+}
+
+bool ScopeNestedCFG::IsMergePoint(BasicBlock *pBB) {
+  unordered_set<BasicBlock *> UniquePredecessors;
+  for (auto itPred = pred_begin(pBB), endPred = pred_end(pBB); itPred != endPred; ++itPred) {
+    BasicBlock *pPredBB = *itPred;
+    if (IsLoopBackedge(pPredBB))
+      continue;
+
+    UniquePredecessors.insert(pPredBB);
+  }
+      
+  return UniquePredecessors.size() >= 2;
+}
+
+bool ScopeNestedCFG::IsAcyclicRegionTerminator(const BasicBlock *pNode) {
+  // Return.
+  if (dyn_cast<ReturnInst>(pNode->getTerminator()))
+    return true;
+
+  BranchKind BK = GetBranchAnnotation(pNode);
+  switch (BK) {
+  case BranchKind::LoopBreak:
+  case BranchKind::LoopContinue:
+  case BranchKind::LoopBackEdge:
+    return true;
+  }
+
+  return false;
+}
+
+
+BasicBlock *ScopeNestedCFG::SplitEdge(BasicBlock *pBB, BasicBlock *pSucc, const Twine &Name, Loop *pLoop, BasicBlock *pToInsertBB) {
+  unsigned SuccIdx = GetSuccessorNumber(pBB, pSucc);
+  return SplitEdge(pBB, SuccIdx, Name, pLoop, pToInsertBB);
+}
+
+BasicBlock *ScopeNestedCFG::SplitEdge(BasicBlock *pBB, unsigned SuccIdx, const Twine &Name, Loop *pLoop, BasicBlock *pToInsertBB) {
+  BasicBlock *pNewBB = pToInsertBB;
+  if (pToInsertBB == nullptr) {
+    pNewBB = BasicBlock::Create(*m_pCtx, Name, m_pFunc, pBB->getNextNode());
+  }
+
+  if (pLoop != nullptr) {
+    pLoop->addBasicBlockToLoop(pNewBB, *m_pLI);
+  }
+
+  BasicBlock *pSucc = pBB->getTerminator()->getSuccessor(SuccIdx);
+  pBB->getTerminator()->setSuccessor(SuccIdx, pNewBB);
+
+  if (pToInsertBB == nullptr) {
+    BranchInst::Create(pSucc, pNewBB);
+  } else {
+    TerminatorInst *pTI = pNewBB->getTerminator();
+    DXASSERT_NOMSG(dyn_cast<BranchInst>(pTI) != nullptr && pTI->getNumSuccessors() == 1);
+    pTI->setSuccessor(0, pSucc);
+  }
+
+  return pNewBB;
+}
+
+BasicBlock *ScopeNestedCFG::SanitizeLoopLatch(Loop *pLoop) {
+  BasicBlock *pHeader = pLoop->getHeader();
+  BasicBlock *pLatch = pLoop->getLoopLatch();
+
+  TerminatorInst *pTI = pLatch->getTerminator();
+  DXASSERT_NOMSG(pTI->getNumSuccessors() != 0 && dyn_cast<ReturnInst>(pTI) == nullptr);
+
+  BasicBlock *pNewLatch = pLatch;
+  // Make sure that latch node is empty and terminates with a 'br'.
+  if (dyn_cast<BranchInst>(pTI) == nullptr || 
+      (&*pLatch->begin()) != pTI ||
+      pTI->getNumSuccessors() > 1) {
+    pNewLatch = SplitEdge(pLatch, pHeader, "dx.LoopLatch", pLoop, nullptr);
+  }
+
+  return pNewLatch;
+}
+
+BasicBlock *ScopeNestedCFG::SanitizeLoopExits(Loop *pLoop) {
+  Loop *pOuterLoop = pLoop->getParentLoop();
+  BasicBlock *pPreHeader = pLoop->getLoopPreheader();
+  BasicBlock *pLatch = pLoop->getLoopLatch();
+
+  SmallVector<Loop::Edge, 8> ExitEdges;
+  pLoop->getExitEdges(ExitEdges);
+  SmallVector<BasicBlock *, 8> OldExitBBs;
+  GetUniqueExitBlocks(ExitEdges, OldExitBBs);
+
+  if (OldExitBBs.empty()) {
+    // A loop without breaks.
+    return nullptr;
+  }
+  
+  // Create the loop exit BB.
+  BasicBlock *pExit = BasicBlock::Create(*m_pCtx, "dx.LoopExit", m_pFunc, pLatch->getNextNode());
+  if (pOuterLoop != nullptr) {
+    pOuterLoop->addBasicBlockToLoop(pExit, *m_pLI);
+  }
+  
+  // Create helper exit blocks.
+  SmallVector<BasicBlock *, 8> HelperExitBBs;
+  for (size_t iExitBB = 0; iExitBB < OldExitBBs.size(); iExitBB++) {
+    BasicBlock *pOldExit = OldExitBBs[iExitBB];
+    BasicBlock *pNewExit = BasicBlock::Create(*m_pCtx, "dx.LoopExitHelper", m_pFunc, pLatch->getNextNode());
+    HelperExitBBs.push_back(pNewExit);
+
+    if (pOuterLoop != nullptr) {
+      pOuterLoop->addBasicBlockToLoop(pNewExit, *m_pLI);
+    }
+
+    // Adjust exit edges.
+    SmallVector<BasicBlock *, 8> OldExitPredBBs;
+    for (auto itPred = pred_begin(pOldExit), endPred = pred_end(pOldExit); itPred != endPred; ++itPred) {
+      OldExitPredBBs.push_back(*itPred);
+    }
+    for (size_t PredIdx = 0; PredIdx < OldExitPredBBs.size(); PredIdx++) {
+      BasicBlock *pOldExitPred = OldExitPredBBs[PredIdx];
+
+      if (pLoop->contains(pOldExitPred)) {
+        unsigned PredSuccIdx = GetSuccessorNumber(pOldExitPred, pOldExit);
+        pOldExitPred->getTerminator()->setSuccessor(PredSuccIdx, pNewExit);
+      }
+    }
+    DXASSERT_NOMSG(pred_begin(pNewExit) != pred_end(pNewExit));
+
+    // Connect helper exit to the loop exit node.
+    BranchInst::Create(pExit, pNewExit);
+  }
+  DXASSERT_NOMSG(HelperExitBBs.size() == OldExitBBs.size());
+
+  // Fix up conditions for the rest of execution.
+  unsigned NumExits = HelperExitBBs.size();
+  BasicBlock *pRestOfExecutionBB = OldExitBBs.back();
+  BranchInst::Create(pRestOfExecutionBB, pExit);
+  for (unsigned i = 0; i < NumExits - 1; i++) {
+    unsigned ExitIdx = NumExits - 2 - i;
+    BasicBlock *pExitHelper = HelperExitBBs[ExitIdx];
+    BasicBlock *pOldExit = OldExitBBs[ExitIdx];
+
+    // Declare helper-exit guard variable.
+    AllocaInst *pAI = new AllocaInst(Type::getInt1Ty(*m_pCtx), 
+                                     Twine("dx.LoopExitHelperCond"),
+                                     m_pFunc->begin()->begin());
+
+    // Initialize the guard to 'false' before the loop.
+    new StoreInst(GetFalse(), pAI, pPreHeader->getTerminator());
+
+    // Assing the guard to 'true' in exit helper.
+    new StoreInst(GetTrue(), pAI, pExitHelper->begin());
+
+    // Insert an 'if' to conditionally guard exit execution.
+    BasicBlock *pIfBB = BasicBlock::Create(*m_pCtx, "dx.LoopExitHelperIf", m_pFunc, pExit->getNextNode());
+    if (pOuterLoop != nullptr) {
+      pOuterLoop->addBasicBlockToLoop(pIfBB, *m_pLI);
+    }
+    LoadInst *pLoadCondI = new LoadInst(pAI);
+    (void)BranchInst::Create(pOldExit, pRestOfExecutionBB, pLoadCondI, pIfBB);
+    pIfBB->getInstList().insert(pIfBB->begin(), pLoadCondI);
+
+    // Adjust rest-of-computation point.
+    pExit->getTerminator()->setSuccessor(0, pIfBB);
+    pRestOfExecutionBB = pIfBB;
+  }
+
+  // Duplicate helper exit nodes such that each has unique predecessor.
+  for (size_t iHelperBB = 0; iHelperBB < HelperExitBBs.size(); iHelperBB++) {
+    BasicBlock *pHelperBB = HelperExitBBs[iHelperBB];
+    // Collect unique predecessors.
+    SmallVector<BasicBlock *, 8> PredBBs;
+    unordered_set<BasicBlock *> UniquePredBBs;
+    for (auto itPred = pred_begin(pHelperBB), endPred = pred_end(pHelperBB); itPred != endPred; ++itPred) {
+      BasicBlock *pPredBB = *itPred;
+      auto P = UniquePredBBs.insert(pPredBB);
+      if (P.second) {
+        PredBBs.push_back(pPredBB);
+      }
+    }
+    // Duplicate helper node.
+    BasicBlock *pInsertionBB = PredBBs[0];
+    for (size_t iSrc = 1; iSrc < PredBBs.size(); iSrc++) {
+      BasicBlock *pPredBB = PredBBs[iSrc];
+      ValueToValueMapTy EmptyRemap;
+      BasicBlock *pClone = CloneBasicBlockAndFixupValues(pHelperBB, EmptyRemap);
+      if (pOuterLoop != nullptr) {
+        pOuterLoop->addBasicBlockToLoop(pClone, *m_pLI);
+      }
+      // Redirect predecessor successors.
+      for (unsigned PredSuccIdx = 0; PredSuccIdx < pPredBB->getTerminator()->getNumSuccessors(); PredSuccIdx++) {
+        if (pPredBB->getTerminator()->getSuccessor(PredSuccIdx) != pHelperBB)
+          continue;
+
+        pPredBB->getTerminator()->setSuccessor(PredSuccIdx, pClone);
+        // Update LoopInfo.
+        if (pOuterLoop != nullptr && !pOuterLoop->contains(pExit)) {
+          pOuterLoop->addBasicBlockToLoop(pExit, *m_pLI);
+        }
+        // Insert into function.
+        m_pFunc->getBasicBlockList().insertAfter(pInsertionBB, pClone);
+        pInsertionBB = pClone;
+      }
+    }
+  }
+
+  return pExit;
+}
+
+void ScopeNestedCFG::SanitizeLoopContinues(Loop *pLoop) {
+  BasicBlock *pLatch = pLoop->getLoopLatch();
+  TerminatorInst *pLatchTI = pLatch->getTerminator();
+  DXASSERT_LOCALVAR_NOMSG(pLatchTI, dyn_cast<BranchInst>(pLatchTI) != nullptr && pLatchTI->getNumSuccessors() == 1 && (&*pLatch->begin()) == pLatchTI);
+
+  // Collect continue BBs.
+  SmallVector<BasicBlock *, 8> LatchPredBBs;
+  for (auto itPred = pred_begin(pLatch), endPred = pred_end(pLatch); itPred != endPred; ++itPred) {
+    BasicBlock *pPredBB = *itPred;
+    LatchPredBBs.push_back(pPredBB);
+  }
+  DXASSERT_NOMSG(LatchPredBBs.size() >= 1);
+
+  // Insert continue helpers.
+  for (size_t i = 0; i < LatchPredBBs.size(); i++) {
+    BasicBlock *pPredBB = LatchPredBBs[i];
+
+    BasicBlock *pContinue = SplitEdge(pPredBB, pLatch, "dx.LoopContinue", pLoop, nullptr);
+    DXASSERT_LOCALVAR_NOMSG(pContinue, pContinue->getTerminator()->getNumSuccessors() == 1);
+    DXASSERT_NOMSG((++pred_begin(pContinue)) == pred_end(pContinue));
+  }
+}
+
+void ScopeNestedCFG::AnnotateLoopBranches(Loop *pLoop, LoopItem *pLI) {
+  // Annotate LB & LE.
+  if (pLI->pLE != nullptr) {
+    AnnotateBranch(pLI->pLP, BranchKind::LoopBegin);
+    AnnotateBranch(pLI->pLE, BranchKind::LoopExit);
+    DXASSERT_NOMSG(pLI->pLE->getTerminator()->getNumSuccessors() == 1);
+
+    // Record and annotate loop breaks.
+    for (auto itPred = pred_begin(pLI->pLE), endPred = pred_end(pLI->pLE); itPred != endPred; ++itPred) {
+      BasicBlock *pPredBB = *itPred;
+      DXASSERT_NOMSG(pPredBB->getTerminator()->getNumSuccessors() == 1);
+      AnnotateBranch(pPredBB, BranchKind::LoopBreak);
+    }
+  } else {
+    AnnotateBranch(pLI->pLP, BranchKind::LoopNoEnd);
+  }
+
+  // Record and annotate loop continues.
+  for (auto itPred = pred_begin(pLI->pLL), endPred = pred_end(pLI->pLL); itPred != endPred; ++itPred) {
+    BasicBlock *pPredBB = *itPred;
+    DXASSERT_NOMSG(pPredBB->getTerminator()->getNumSuccessors() == 1);
+    DXASSERT_NOMSG((++pred_begin(pPredBB)) == pred_end(pPredBB));
+    AnnotateBranch(pPredBB, BranchKind::LoopContinue);
+  }
+
+  // Annotate loop backedge.
+  AnnotateBranch(pLI->pLL, BranchKind::LoopBackEdge);
+}
+
+
+//-----------------------------------------------------------------------------
+// BasicBlock topological order for acyclic region.
+//-----------------------------------------------------------------------------
+void ScopeNestedCFG::BlockTopologicalOrderAndReachability::AppendBlock(BasicBlock *pBB, 
+                                                                       unique_ptr<BitVector> ReachableBBs) {
+  unsigned Id = (unsigned)m_BlockState.size();
+  auto itp = m_BlockIdMap.insert( {pBB, Id } );
+  DXASSERT_NOMSG(itp.second);
+  ReachableBBs->set(Id);
+  m_BlockState.emplace_back(BasicBlockState(pBB, std::move(ReachableBBs)));
+}
+
+unsigned ScopeNestedCFG::BlockTopologicalOrderAndReachability::GetNumBlocks() const {
+  DXASSERT_NOMSG(m_BlockState.size() < UINT32_MAX);
+  return (unsigned)m_BlockState.size();
+}
+
+BasicBlock *ScopeNestedCFG::BlockTopologicalOrderAndReachability::GetBlock(unsigned Id) const {
+  return m_BlockState[Id].pBB;
+}
+
+unsigned ScopeNestedCFG::BlockTopologicalOrderAndReachability::GetBlockId(BasicBlock *pBB) const { 
+  const auto it = m_BlockIdMap.find(pBB);
+  if (it != m_BlockIdMap.cend())
+    return it->second;
+  else
+    return UINT32_MAX;
+}
+
+BitVector *ScopeNestedCFG::BlockTopologicalOrderAndReachability::GetReachableBBs(BasicBlock *pBB) const {
+  return GetReachableBBs(GetBlockId(pBB));
+}
+
+BitVector *ScopeNestedCFG::BlockTopologicalOrderAndReachability::GetReachableBBs(unsigned Id) const {
+  return m_BlockState[Id].ReachableBBs.get();
+}
+
+void ScopeNestedCFG::BlockTopologicalOrderAndReachability::dump(raw_ostream &OS) const {
+  for (unsigned i = 0; i < GetNumBlocks(); i++) {
+    BasicBlock *pBB = GetBlock(i);
+    DXASSERT_NOMSG(GetBlockId(pBB) == i);
+    OS << i << ": " << pBB->getName() << ",  ReachableBBs = { ";
+    BitVector *pReachableBBs = GetReachableBBs(i);
+    bool bFirst = true;
+    for (unsigned j = 0; j < GetNumBlocks(); j++) {
+      if (pReachableBBs->test(j)) {
+        if (!bFirst) OS << ", ";
+        OS << j;
+        bFirst = false;
+      }
+    }
+    OS << " }\n";
+  }
+}
+
+void ScopeNestedCFG::ComputeBlockTopologicalOrderAndReachability(BasicBlock *pEntry, 
+                                                                 BlockTopologicalOrderAndReachability &BTO) {
+  unordered_map<BasicBlock *, unsigned> WaterMarks;
+  ComputeBlockTopologicalOrderAndReachabilityRec(pEntry, BTO, WaterMarks);
+
+#if SNCFG_DBG
+  dbgs() << "\nBB topological order and reachable BBs rooted at " << pEntry->getName() << ":\n";
+  BTO.dump(dbgs());
+#endif
+}
+
+void ScopeNestedCFG::ComputeBlockTopologicalOrderAndReachabilityRec(BasicBlock *pNode,
+                                                                    BlockTopologicalOrderAndReachability &BTO,
+                                                                    unordered_map<BasicBlock *, unsigned> &Marks) {
+  auto itMarkBB = Marks.find(pNode);
+  if (Marks.find(pNode) != Marks.end()) {
+    DXASSERT(itMarkBB->second == 2, "acyclic component has a cycle");
+    return;
+  }
+
+  unsigned NumBBs = (unsigned)pNode->getParent()->getBasicBlockList().size();
+
+  // Region terminator.
+  if (IsAcyclicRegionTerminator(pNode)) {
+    Marks[pNode] = 2; // late watermark
+    BTO.AppendBlock(pNode, std::make_unique<BitVector>(NumBBs, false));
+    return;
+  }
+
+  BasicBlock *pNodeToFollowSuccessors = GetEffectiveNodeToFollowSuccessor(pNode);
+
+  // Loop with no exit.
+  if (pNodeToFollowSuccessors == nullptr) {
+    Marks[pNode] = 2; // late watermark
+    BTO.AppendBlock(pNode, std::make_unique<BitVector>(NumBBs, false));
+    return;
+  }
+
+  Marks[pNode] = 1; // early watermark
+
+  auto ReachableBBs = std::make_unique<BitVector>(NumBBs, false);
+  for (auto itSucc = succ_begin(pNodeToFollowSuccessors), endSucc = succ_end(pNodeToFollowSuccessors); itSucc != endSucc; ++itSucc) {
+    BasicBlock *pSuccBB = *itSucc;
+
+    ComputeBlockTopologicalOrderAndReachabilityRec(pSuccBB, BTO, Marks);
+    // Union reachable BBs.
+    (*ReachableBBs) |= (*BTO.GetReachableBBs(pSuccBB));
+  }
+
+  Marks[pNode] = 2; // late watermark
+
+  BTO.AppendBlock(pNode, std::move(ReachableBBs));
+}
+
+
+//-----------------------------------------------------------------------------
+// Recovery of scope end points.
+//-----------------------------------------------------------------------------
+void ScopeNestedCFG::DetermineScopeEndPoints(BasicBlock *pEntry,
+                                             bool bRecomputeSwitchScope,
+                                             const BlockTopologicalOrderAndReachability &BTO,
+                                             const SwitchBreaksMap &SwitchBreaks,
+                                             ScopeEndPointsMap &ScopeEndPoints,
+                                             ScopeEndPointsMap &DeltaScopeEndPoints) {
+  DXASSERT_NOMSG(DeltaScopeEndPoints.empty());
+
+  // 1. Determine sets of reachable merge points and identifiable scope end points.
+  MergePointsMap MergePoints;
+  BasicBlock *pExit = nullptr;
+  BitVector *pReachableBBs = nullptr;
+  if (bRecomputeSwitchScope) {
+    auto it = ScopeEndPoints.find(pEntry);
+    if (it != ScopeEndPoints.end()) {
+      pExit = it->second;
+    }
+    pReachableBBs = BTO.GetReachableBBs(pEntry);
+  }
+  DetermineReachableMergePoints(pEntry, pExit, bRecomputeSwitchScope, pReachableBBs,
+                                BTO, SwitchBreaks, ScopeEndPoints, MergePoints);
+
+  // 2. Construct partial scope end points map.
+  for (auto &itMPI : MergePoints) {
+    BasicBlock *pBB = itMPI.first;
+    MergePointInfo &MPI = *itMPI.second;
+
+    BasicBlock *pEndBB = nullptr;
+    if (MPI.MP != UINT32_MAX) {
+      pEndBB = BTO.GetBlock(MPI.MP);
+    }
+
+    auto itOldEndPointBB = ScopeEndPoints.find(pBB);
+    if (itOldEndPointBB != ScopeEndPoints.end() && itOldEndPointBB->second != pEndBB) {
+      DeltaScopeEndPoints[pBB] = itOldEndPointBB->second;
+      itOldEndPointBB->second = pEndBB;
+    } else {
+      ScopeEndPoints[pBB] = pEndBB;
+    }
+  }
+
+#if SNCFG_DBG
+  dbgs() << "\nScope ends:\n";
+  for (auto it = ScopeEndPoints.begin(); it != ScopeEndPoints.end(); ++it) {
+    BasicBlock *pBegin = it->first;
+    BasicBlock *pEnd = it->second;
+    dbgs() << pBegin->getName() << ", ID=" << BTO.GetBlockId(pBegin) << " -> ";
+    if (pEnd) {
+      dbgs() << pEnd->getName() << ", ID=" << BTO.GetBlockId(pEnd) << "\n";
+    } else {
+      dbgs() << "unreachable\n";
+    }
+  }
+#endif
+}
+
+void ScopeNestedCFG::DetermineReachableMergePoints(BasicBlock *pEntry,
+                                                   BasicBlock *pExit,
+                                                   bool bRecomputeSwitchScope,
+                                                   const BitVector *pReachableBBs,
+                                                   const BlockTopologicalOrderAndReachability &BTO,
+                                                   const SwitchBreaksMap &SwitchBreaks,
+                                                   const ScopeEndPointsMap &OldScopeEndPoints,
+                                                   MergePointsMap &MergePoints) {
+  DXASSERT_NOMSG(MergePoints.empty());
+  unsigned MinBBIdx = 0;
+  unsigned MaxBBIdx = BTO.GetNumBlocks() - 1;
+  if (bRecomputeSwitchScope) {
+    MinBBIdx = BTO.GetBlockId(pExit);
+    MaxBBIdx = BTO.GetBlockId(pEntry);
+  }
+
+  for (unsigned iBB = MinBBIdx; iBB <= MaxBBIdx; iBB++) {
+    if (bRecomputeSwitchScope && !pReachableBBs->test(iBB)) {
+      // The block does not belong to the current switch region.
+      continue;
+    }
+
+    BasicBlock *pBB = BTO.GetBlock(iBB);
+    MergePoints[pBB] = unique_ptr<MergePointInfo>(new MergePointInfo);
+    MergePointInfo &MPI = *MergePoints[pBB];
+    BasicBlock *pNodeToFollowSuccessors = GetEffectiveNodeToFollowSuccessor(pBB);
+
+    MPI.MP = UINT32_MAX;
+
+    if (!IsAcyclicRegionTerminator(pBB) && 
+        pNodeToFollowSuccessors != nullptr && 
+        !IsLoopBackedge(pNodeToFollowSuccessors) &&
+        !(bRecomputeSwitchScope && pBB == pExit)) {
+      // a. Collect unique successors, excluding switch break.
+      const auto itSwitchBreaks = SwitchBreaks.find(pBB);
+      const BasicBlock *pSwitchBreak = (itSwitchBreaks == SwitchBreaks.cend()) ? nullptr : itSwitchBreaks->second;
+      vector<BasicBlock *> Successors;
+      CollectUniqueSuccessors(pNodeToFollowSuccessors, pSwitchBreak, Successors);
+
+      // b. Partition successors.
+      struct Partition {
+        set<unsigned> MPIndices;
+        unordered_set<BasicBlock *> Blocks;
+      };
+      vector<Partition> Partitions;
+
+      for (auto pSuccBB : Successors) {
+        if (MergePoints.find(pSuccBB) == MergePoints.end()) {
+          DXASSERT_NOMSG(bRecomputeSwitchScope && BTO.GetBlockId(pSuccBB) < MinBBIdx);
+          MergePoints[pSuccBB] = std::make_unique<MergePointInfo>();
+        }
+        MergePointInfo &SuccMPI = *MergePoints[pSuccBB];
+
+        // Find a partition for this successor.
+        bool bFound = false;
+        for (auto &P : Partitions) {
+          set<unsigned> Intersection;
+          std::set_intersection(P.MPIndices.begin(), P.MPIndices.end(),
+                                SuccMPI.CandidateSet.begin(), SuccMPI.CandidateSet.end(),
+                                std::inserter(Intersection, Intersection.end()));
+          if (!Intersection.empty()) {
+            swap(P.MPIndices, Intersection);
+            P.Blocks.insert(pSuccBB);
+            bFound = true;
+            break;
+          }
+        }
+
+        if (!bFound) {
+          // Create a new partition.
+          Partition P;
+          P.MPIndices = SuccMPI.CandidateSet;
+          P.Blocks.insert(pSuccBB);
+          Partitions.emplace_back(P);
+        }
+      }
+
+      // c. Analyze successors.
+      if (Partitions.size() == 1) {
+        auto &Intersection = Partitions[0].MPIndices;
+        if (!Intersection.empty()) {
+          MPI.MP = *Intersection.crbegin();
+          swap(MPI.CandidateSet, Intersection); // discard partition set, as we do not need it anymore.
+        } else {
+          MPI.MP = UINT32_MAX;
+        }
+      } else {
+        // We do not [yet] know the merge point.
+        MPI.MP = UINT32_MAX;
+
+        // For switch, select the largest partition with at least two elements.
+        if (SwitchInst *pSI = dyn_cast<SwitchInst>(pNodeToFollowSuccessors->getTerminator())) {
+          size_t MaxPartSize = 0;
+          size_t MaxPartIdx = 0;
+          for (size_t i = 0; i < Partitions.size(); i++) {
+            auto s = Partitions[i].Blocks.size();
+            if (s > MaxPartSize) {
+              MaxPartSize = s;
+              MaxPartIdx = i;
+            }
+          }
+
+          if (MaxPartSize >= 2) {
+            MPI.MP = *Partitions[MaxPartIdx].MPIndices.crbegin();
+            swap(MPI.CandidateSet, Partitions[MaxPartIdx].MPIndices); // discard partition set, as we do not need it anymore.
+          }
+
+          //TODO: during final testing consider to remove.
+          if (MPI.MP == UINT32_MAX) {
+            auto itOldMP = OldScopeEndPoints.find(pBB);
+            if (itOldMP != OldScopeEndPoints.end()) {
+              MPI.MP = BTO.GetBlockId(itOldMP->second);
+              MPI.CandidateSet.insert(MPI.MP);
+            }
+          }
+        }
+
+        if (MPI.MP == UINT32_MAX) {
+          // Compute MP union for upcoming propagation upwards.
+          set<unsigned> Union;
+          for (auto pSuccBB : Successors) {
+            MergePointInfo &SuccMPI = *MergePoints[pSuccBB];
+
+            set<unsigned> TmpSet;
+            std::set_union(Union.begin(), Union.end(),
+                           SuccMPI.CandidateSet.begin(), SuccMPI.CandidateSet.end(),
+                           std::inserter(TmpSet, TmpSet.end()));
+            swap(Union, TmpSet);
+          }
+
+          swap(MPI.CandidateSet, Union);
+        }
+      }
+    }
+
+    // Add a merge point to the candidate set.
+    if (IsMergePoint(pBB)) {
+      DXASSERT_NOMSG(m_LoopMap.find(pBB) == m_LoopMap.cend());
+      DXASSERT_NOMSG(m_LE2LBMap.find(pBB) == m_LE2LBMap.cend());
+      MPI.CandidateSet.insert(iBB);
+    }
+  }
+
+  //TODO: during final testing consider to remove.
+  // Compensate switch end point.
+  if (SwitchInst *pSI = dyn_cast<SwitchInst>(pEntry->getTerminator())) {
+    auto itOldEP = OldScopeEndPoints.find(pEntry);
+    auto itMP = MergePoints.find(pEntry);
+    if (itOldEP != OldScopeEndPoints.end()) {
+      unsigned OldMP = BTO.GetBlockId(itOldEP->second);
+      MergePointInfo &MPI = *itMP->second;
+      if (MPI.MP != OldMP) {
+        MPI.MP = OldMP;
+        MPI.CandidateSet.clear();
+        if (MPI.MP != UINT32_MAX) {
+          MPI.CandidateSet.insert(MPI.MP);
+        }
+      }
+    }
+  }
+
+#if SNCFG_DBG
+  dbgs() << "\nScope ends:\n";
+  for (auto it = MergePoints.begin(); it != MergePoints.end(); ++it) {
+    BasicBlock *pBB = it->first;
+    MergePointInfo &MPI = *it->second;
+    dbgs() << it->first->getName() << ":  ID = " << BTO.GetBlockId(pBB) << ", MP = " << (int)MPI.MP << "\n";
+    dbgs() << "  CandidateSet = "; DumpIntSet(dbgs(), MPI.CandidateSet); dbgs() << "\n";
+  }
+#endif
+}
+
+void ScopeNestedCFG::DetermineSwitchBreaks(BasicBlock *pSwitchBegin,
+                                           const ScopeEndPointsMap &ScopeEndPoints,
+                                           const BlockTopologicalOrderAndReachability &BTO,
+                                           SwitchBreaksMap &SwitchBreaks) {
+  DXASSERT_NOMSG(SwitchBreaks.empty());
+  TerminatorInst *pTI = pSwitchBegin->getTerminator();
+  DXASSERT_LOCALVAR_NOMSG(pTI, dyn_cast<SwitchInst>(pTI) != nullptr);
+
+  auto it = ScopeEndPoints.find(pSwitchBegin);
+  if (it == ScopeEndPoints.end())
+    return;
+
+  BasicBlock *pSwitchEnd = it->second;
+  if (pSwitchEnd == nullptr)
+    return;
+
+  BitVector *pReachableFromSwitchBegin = BTO.GetReachableBBs(pSwitchBegin);
+  for (auto itPred = pred_begin(pSwitchEnd), endPred = pred_end(pSwitchEnd); itPred != endPred; ++itPred) {
+    BasicBlock *pPredBB = *itPred;
+    unsigned PredId = BTO.GetBlockId(pPredBB);
+
+    // An alternative entry into the acyclic component.
+    if (PredId == UINT32_MAX)
+      continue;
+
+    // Record this switch break.
+    if (pReachableFromSwitchBegin->test(PredId)) {
+      SwitchBreaks.insert( {pPredBB, pSwitchEnd} );
+    }
+  }
+
+#if SNCFG_DBG
+  if (!SwitchBreaks.empty()) {
+    dbgs() << "\nSwitch breaks:\n";
+    for (auto it = SwitchBreaks.begin(); it != SwitchBreaks.end(); ++it) {
+      BasicBlock *pSrcBB = it->first;
+      BasicBlock *pDstBB = it->second;
+      dbgs() << pSrcBB->getName() << " -> " << pDstBB->getName() << "\n";
+    }
+  }
+#endif
+}
+
+
+//-----------------------------------------------------------------------------
+// Transformation of acyclic region.
+//-----------------------------------------------------------------------------
+void ScopeNestedCFG::TransformAcyclicRegion(BasicBlock *pEntry) {
+  unordered_map<BasicBlock *, vector<BasicBlock *> > BlockClones;
+  unordered_map<BasicBlock *, vector<BasicBlock *> > Edges;
+  ValueToValueMapTy RegionValueRemap;
+
+  BlockTopologicalOrderAndReachability BTO;
+  ComputeBlockTopologicalOrderAndReachability(pEntry, BTO);
+
+  // Set up entry scope.
+  ScopeStackItem &EntryScope = PushScope(pEntry);
+  DXASSERT_NOMSG(EntryScope.pScopeBeginBB == pEntry);
+  EntryScope.pClonedScopeBeginBB = pEntry;
+  EntryScope.pScopeEndBB = nullptr;
+  EntryScope.pClonedScopeEndBB = nullptr;
+  DXASSERT_NOMSG(EntryScope.SuccIdx == 0);
+  EntryScope.ScopeEndPoints = std::make_shared<ScopeEndPointsMap>();
+  EntryScope.DeltaScopeEndPoints = std::make_shared<ScopeEndPointsMap>();
+  EntryScope.SwitchBreaks = std::make_shared<SwitchBreaksMap>();
+  DetermineScopeEndPoints(pEntry, false, BTO, SwitchBreaksMap{}, 
+                          *EntryScope.ScopeEndPoints.get(), *EntryScope.DeltaScopeEndPoints.get());
+
+  while (!m_ScopeStack.empty()) {
+    ScopeStackItem Scope = *GetScope();
+    PopScope();
+    // Assume: (1) current node is already cloned (if needed),
+    //         (2) current node is already properly connected to its predecessor
+
+    TerminatorInst *pScopeBeginTI = Scope.pScopeBeginBB->getTerminator();
+    BranchKind BeginScopeBranchKind = GetBranchAnnotation(Scope.pScopeBeginBB);
+
+    //
+    // I. Process the node.
+    //
+
+    // 1. The node is a scope terminator.
+
+    // 1a. Return.
+    if (dyn_cast<ReturnInst>(pScopeBeginTI)) {
+      continue;
+    }
+
+    DXASSERT_NOMSG(pScopeBeginTI->getNumSuccessors() > 0);
+    // 1b. Break and continue.
+    switch (BeginScopeBranchKind) {
+    case BranchKind::LoopBreak: {
+      // Connect to loop exit.
+      TerminatorInst *pClonedScopeBeginTI = Scope.pClonedScopeBeginBB->getTerminator();
+      DXASSERT_NOMSG(pClonedScopeBeginTI->getNumSuccessors() == 1);
+      DXASSERT_NOMSG(m_LoopMap.find(pEntry) != m_LoopMap.end());
+      LoopItem &LI = m_LoopMap[pEntry];
+      AddEdge(Scope.pClonedScopeBeginBB, 0, LI.pLE, Edges);
+      continue;
+    }
+
+    case BranchKind::LoopContinue: {
+      // Connect to loop latch.
+      TerminatorInst *pClonedScopeBeginTI = Scope.pClonedScopeBeginBB->getTerminator();
+      DXASSERT_NOMSG(pClonedScopeBeginTI->getNumSuccessors() == 1);
+      DXASSERT_NOMSG(m_LoopMap.find(pEntry) != m_LoopMap.end());
+      LoopItem &LI = m_LoopMap[pEntry];
+      AddEdge(Scope.pClonedScopeBeginBB, 0, LI.pLL, Edges);
+      continue;
+    }
+
+    default: ;  // Process further.
+    }
+
+    // 1c. Loop latch node.
+    if (IsLoopBackedge(Scope.pScopeBeginBB)) {
+      continue;
+    }
+
+    // 2. Clone a nested loop and proceed after the loop.
+    if (BeginScopeBranchKind == BranchKind::LoopBegin || BeginScopeBranchKind == BranchKind::LoopNoEnd) {
+      // The node is a loop preheader, which has been already cloned, if necessary.
+
+      // Original loop.
+      BasicBlock *pPreheader = Scope.pScopeBeginBB;
+      DXASSERT_NOMSG(pPreheader->getTerminator()->getNumSuccessors() == 1);
+      BasicBlock *pHeader = pPreheader->getTerminator()->getSuccessor(0);
+      LoopItem &Loop = m_LoopMap[pHeader];
+
+      // Clone loop.
+      BasicBlock *pClonedHeader = CloneLoop(pHeader, Scope.pClonedScopeBeginBB, BlockClones, Edges, RegionValueRemap);
+
+      // Connect cloned preheader to cloned loop.
+      AddEdge(Scope.pClonedScopeBeginBB, 0, pClonedHeader, Edges);
+
+      // Push loop-end node onto the stack.
+      LoopItem &ClonedLoop = m_LoopMap[pClonedHeader];
+
+      if (Loop.pLE != nullptr) {
+        // Loop with loop exit node.
+        DXASSERT_NOMSG(Loop.pLE->getTerminator()->getNumSuccessors() == 1);
+        ScopeStackItem &AfterEndLoopScope = PushScope(Loop.pLE);
+        AfterEndLoopScope.pClonedScopeBeginBB = ClonedLoop.pLE;
+        AfterEndLoopScope.ScopeEndPoints = Scope.ScopeEndPoints;
+        AfterEndLoopScope.DeltaScopeEndPoints = Scope.DeltaScopeEndPoints;
+        AfterEndLoopScope.SwitchBreaks = Scope.SwitchBreaks;
+      } else {
+        // Loop without loop exit node.
+        DXASSERT_NOMSG(ClonedLoop.pLE == nullptr);
+      }
+
+      continue;
+    }
+
+    // 3. Classify scope.
+    bool bSwitchScope = IsSwitch(pScopeBeginTI);
+    bool bIfScope = IsIf(pScopeBeginTI);
+
+    // 4. Open scope.
+    if (Scope.SuccIdx == 0 && (bIfScope || bSwitchScope)) {
+      if (bSwitchScope) {
+        // Detect switch breaks for switch scope.
+        SwitchBreaksMap SwitchBreaks;
+        DetermineSwitchBreaks(Scope.pScopeBeginBB, *Scope.ScopeEndPoints.get(), BTO, SwitchBreaks);
+
+        if (!SwitchBreaks.empty()) {
+          // After switch breaks are known, recompute scope end points more precisely.
+          Scope.DeltaScopeEndPoints = std::make_shared<ScopeEndPointsMap>();
+          Scope.SwitchBreaks = std::make_shared<SwitchBreaksMap>(SwitchBreaks);
+          DetermineScopeEndPoints(Scope.pScopeBeginBB, true, BTO, 
+                                  *Scope.SwitchBreaks.get(),
+                                  *Scope.ScopeEndPoints.get(),
+                                  *Scope.DeltaScopeEndPoints.get());
+        }
+      }
+
+      if (bIfScope) {
+        // Refine if-scope end point.
+        auto itEndIfScope = Scope.ScopeEndPoints->find(Scope.pScopeBeginBB);
+        DXASSERT_NOMSG(itEndIfScope != Scope.ScopeEndPoints->cend());
+        if (itEndIfScope->second == nullptr) {
+          ScopeStackItem *pParentScope = GetScope();
+          BasicBlock *pCandidateEndScopeBB = nullptr;
+          if (pParentScope != nullptr && pParentScope->pScopeEndBB != nullptr) {
+            // Determine which branch has parent's end scope node.
+            unsigned ParentScopeEndId = BTO.GetBlockId(pParentScope->pScopeEndBB);
+            for (unsigned i = 0; i < pScopeBeginTI->getNumSuccessors(); i++) {
+              BasicBlock *pSucc = pScopeBeginTI->getSuccessor(i);
+              // Skip a switch break.
+              auto itSwBreak = Scope.SwitchBreaks->find(pSucc);
+              if (itSwBreak != Scope.SwitchBreaks->end()) {
+                continue;
+              }
+
+              BitVector *pReachableBBs = BTO.GetReachableBBs(pSucc);
+              if (pReachableBBs->test(ParentScopeEndId)) {
+                if (!pCandidateEndScopeBB) {
+                  // Case1: one of IF's branches terminates only by region terminators.
+                  pCandidateEndScopeBB = pSucc;
+                } else {
+                  // Case2: both branches terminate only by region terminators (e.g., SWITCH breaks).
+                  pCandidateEndScopeBB = nullptr;
+                }
+              }
+            }
+
+            if (pCandidateEndScopeBB) {
+              Scope.bRestoreIfScopeEndPoint = true;
+              itEndIfScope->second = pCandidateEndScopeBB;
+#if SNCFG_DBG
+              BasicBlock *pBegin = Scope.pScopeBeginBB;
+              BasicBlock *pEnd = pCandidateEndScopeBB;
+              dbgs() << "\nAdjusted IF's end: ";
+              dbgs() << pBegin->getName() << ", ID=" << BTO.GetBlockId(pBegin) << " -> ";
+              dbgs() << pEnd->getName() << ", ID=" << BTO.GetBlockId(pEnd) << "\n";
+#endif
+            }
+          }
+        }
+      }
+
+      // Determine scope end and set up helper nodes, if necessary.
+      BranchKind ScopeBeginBranchKind = BranchKind::Invalid;
+      BranchKind ScopeEndBranchKind = BranchKind::Invalid;
+      auto itEndScope = Scope.ScopeEndPoints->find(Scope.pScopeBeginBB);
+      if (itEndScope != Scope.ScopeEndPoints->cend() && itEndScope->second != nullptr) {
+        Scope.pScopeEndBB = itEndScope->second;
+        Scope.pClonedScopeEndBB = BasicBlock::Create(*m_pCtx, bIfScope ? "dx.EndIfScope" : "dx.EndSwitchScope", m_pFunc, Scope.pScopeEndBB);
+        BranchInst::Create(Scope.pClonedScopeEndBB, Scope.pClonedScopeEndBB);
+        ScopeBeginBranchKind = bIfScope ? BranchKind::IfBegin : BranchKind::SwitchBegin;
+        ScopeEndBranchKind = bIfScope ? BranchKind::IfEnd : BranchKind::SwitchEnd;
+      } else {
+        Scope.pScopeEndBB = nullptr;
+        Scope.pClonedScopeEndBB = nullptr;
+        ScopeBeginBranchKind = bIfScope ? BranchKind::IfNoEnd : BranchKind::SwitchNoEnd;
+      }
+
+      // Annotate scope-begin and scope-end branches.
+      DXASSERT_NOMSG(ScopeBeginBranchKind != BranchKind::Invalid);
+      AnnotateBranch(Scope.pClonedScopeBeginBB, ScopeBeginBranchKind);
+      if (Scope.pClonedScopeEndBB != nullptr) {
+        DXASSERT_NOMSG(ScopeEndBranchKind != BranchKind::Invalid);
+        AnnotateBranch(Scope.pClonedScopeEndBB, ScopeEndBranchKind);
+      }
+    }
+
+    // 5. Push unfinished if and switch scopes onto the stack.
+    if ((bIfScope || bSwitchScope) && 
+        Scope.SuccIdx < pScopeBeginTI->getNumSuccessors()) {
+      ScopeStackItem &UnfinishedScope = RePushScope(Scope);
+
+      // Advance successor.
+      UnfinishedScope.SuccIdx++;
+    }
+
+    // 6. Finalize scope.
+    if ((bIfScope || bSwitchScope) && (Scope.SuccIdx == pScopeBeginTI->getNumSuccessors())) {
+      if (Scope.pScopeEndBB != nullptr) {
+        bool bEndScopeSharedWithParent = false;
+
+        ScopeStackItem *pParentScope = GetScope();
+        if (pParentScope != nullptr) {
+          if (Scope.pScopeEndBB == pParentScope->pScopeEndBB) {
+            bEndScopeSharedWithParent = true;
+            if (Scope.pClonedScopeEndBB != nullptr) {
+              AddEdge(Scope.pClonedScopeEndBB, 0, pParentScope->pClonedScopeEndBB, Edges);
+            }
+          }
+        }
+
+        if (!bEndScopeSharedWithParent) {
+          // Clone original end-of-scope BB.
+          ScopeStackItem &AfterEndOfScopeScope = PushScope(Scope.pScopeEndBB);
+          AfterEndOfScopeScope.pClonedScopeBeginBB = CloneNode(Scope.pScopeEndBB, BlockClones, RegionValueRemap);
+          AfterEndOfScopeScope.ScopeEndPoints = Scope.ScopeEndPoints;
+          AfterEndOfScopeScope.DeltaScopeEndPoints = Scope.DeltaScopeEndPoints;
+          AfterEndOfScopeScope.SwitchBreaks = Scope.SwitchBreaks;
+          AddEdge(Scope.pClonedScopeEndBB, 0, AfterEndOfScopeScope.pClonedScopeBeginBB, Edges);
+        }
+      }
+
+      // Restore original (parent scope) ScopeEndPoints.
+      if (bSwitchScope) {
+        for (auto &it : *Scope.DeltaScopeEndPoints) {
+          BasicBlock *pBB = it.first;
+          BasicBlock *pOldMP = it.second;
+          (*Scope.ScopeEndPoints)[pBB] = pOldMP;
+        }
+      }
+      if (Scope.bRestoreIfScopeEndPoint) {
+        DXASSERT_NOMSG(bIfScope);
+        auto itEndIfScope = Scope.ScopeEndPoints->find(Scope.pScopeBeginBB);
+        DXASSERT_NOMSG(itEndIfScope != Scope.ScopeEndPoints->cend());
+        DXASSERT_NOMSG(itEndIfScope->second != nullptr);
+        itEndIfScope->second = nullptr;
+      }
+
+      continue;
+    }
+
+    //
+    // II. Process successors.
+    //
+    BasicBlock *pSuccBB = pScopeBeginTI->getSuccessor(Scope.SuccIdx);
+
+    // 7. Already processed successor.
+    if (bIfScope || bSwitchScope) {
+      if (pSuccBB == Scope.pPrevSuccBB) {
+        DXASSERT_NOMSG(Scope.pClonedPrevSuccBB != nullptr);
+        AddEdge(Scope.pClonedScopeBeginBB, Scope.SuccIdx, Scope.pClonedPrevSuccBB, Edges);
+        continue;
+      }
+    }
+
+    // 8. Successor meets end-of-scope.
+    bool bEndOfScope = false;
+    if (pSuccBB == Scope.pScopeEndBB) {
+      // 8a. Successor is end of current scope.
+      bEndOfScope = true;
+      AddEdge(Scope.pClonedScopeBeginBB, Scope.SuccIdx, Scope.pClonedScopeEndBB, Edges);
+    } else {
+      // 8b. Successor is end of parent scope.
+      ScopeStackItem *pParentScope = GetScope();
+      if (pParentScope != nullptr) {
+        auto it = Scope.SwitchBreaks->find(Scope.pScopeBeginBB);
+        bool bSwitchBreak = it != Scope.SwitchBreaks->cend();
+        if (pSuccBB == pParentScope->pScopeEndBB) {
+          bEndOfScope = true;
+          if (!bSwitchBreak) {
+            AddEdge(Scope.pClonedScopeBeginBB, Scope.SuccIdx, pParentScope->pClonedScopeEndBB, Edges);
+          }
+        }
+        if (bSwitchBreak) {
+          if (pSuccBB == it->second) {
+            // Switch break.
+            bEndOfScope = true;
+            ScopeStackItem *pSwitchScope = FindParentScope(ScopeStackItem::Kind::Switch);
+            DXASSERT_NOMSG(pSuccBB == pSwitchScope->pScopeEndBB);
+            BasicBlock *pSwitchBreakHelper = BasicBlock::Create(*m_pCtx, "dx.SwitchBreak", m_pFunc, pSuccBB);
+            BranchInst::Create(pSwitchBreakHelper, pSwitchBreakHelper);
+            AnnotateBranch(pSwitchBreakHelper, BranchKind::SwitchBreak);
+            AddEdge(Scope.pClonedScopeBeginBB, Scope.SuccIdx, pSwitchBreakHelper, Edges);
+            AddEdge(pSwitchBreakHelper, 0, pSwitchScope->pClonedScopeEndBB, Edges);
+          }
+        }
+      }
+    }
+
+    // 9. Clone successor & push its record onto the stack.
+    if (!bEndOfScope) {
+      BasicBlock *pClonedSucc = CloneNode(pSuccBB, BlockClones, RegionValueRemap);
+
+      if (bIfScope || bSwitchScope) {
+        ScopeStackItem *pParentScope = GetScope();
+        pParentScope->pPrevSuccBB = pSuccBB;
+        pParentScope->pClonedPrevSuccBB = pClonedSucc;
+      }
+
+      // Create new scope to process the successor.
+      ScopeStackItem &SuccScope = PushScope(pSuccBB);
+      SuccScope.pPrevSuccBB = nullptr;
+      SuccScope.pClonedPrevSuccBB = nullptr;
+      SuccScope.pClonedScopeBeginBB = pClonedSucc;
+      SuccScope.ScopeEndPoints = Scope.ScopeEndPoints;
+      SuccScope.DeltaScopeEndPoints = Scope.DeltaScopeEndPoints;
+      SuccScope.SwitchBreaks = Scope.SwitchBreaks;
+      AddEdge(Scope.pClonedScopeBeginBB, Scope.SuccIdx, SuccScope.pClonedScopeBeginBB, Edges);
+    }
+  }
+
+  // Fixup edges.
+  for (auto itEdge = Edges.begin(), endEdge = Edges.end(); itEdge != endEdge; ++itEdge) {
+    BasicBlock *pBB = itEdge->first;
+    vector<BasicBlock *> &Successors = itEdge->second;
+    TerminatorInst *pTI = pBB->getTerminator();
+    DXASSERT_NOMSG(Successors.size() == pTI->getNumSuccessors());
+
+    for (unsigned i = 0; i < pTI->getNumSuccessors(); ++i) {
+      pTI->setSuccessor(i, Successors[i]);
+    }
+  }
+}
+
+ScopeNestedCFG::ScopeStackItem &ScopeNestedCFG::PushScope(BasicBlock *pBB) {
+  ScopeStackItem SSI;
+  SSI.pScopeBeginBB = pBB;
+  TerminatorInst *pTI = pBB->getTerminator();
+
+  if (dyn_cast<BranchInst>(pTI)) {
+    DXASSERT_NOMSG(!IsLoopBackedge(pBB));
+    unsigned NumSucc = pBB->getTerminator()->getNumSuccessors();
+    switch (NumSucc) {
+    case 1: SSI.ScopeKind = ScopeStackItem::Kind::Fallthrough;  break;
+    case 2: SSI.ScopeKind = ScopeStackItem::Kind::If;           break;
+    default: DXASSERT_NOMSG(false);
+    }
+  }
+  else if (dyn_cast<ReturnInst>(pTI)) {
+    SSI.ScopeKind = ScopeStackItem::Kind::Return;
+  }
+  else if (dyn_cast<SwitchInst>(pTI)) {
+    SSI.ScopeKind = ScopeStackItem::Kind::Switch;
+  }
+  else {
+    DXASSERT_NOMSG(false);
+  }
+
+  m_ScopeStack.emplace_back(SSI);
+  return *GetScope();
+}
+
+ScopeNestedCFG::ScopeStackItem &ScopeNestedCFG::RePushScope(const ScopeStackItem &Scope) {
+  m_ScopeStack.emplace_back(Scope);
+  return *GetScope();
+}
+
+ScopeNestedCFG::ScopeStackItem *ScopeNestedCFG::GetScope(unsigned Idx) {
+  if (m_ScopeStack.size() > Idx) {
+    return &m_ScopeStack[m_ScopeStack.size() - 1 - Idx];
+  } else {
+    return nullptr;
+  }
+}
+
+ScopeNestedCFG::ScopeStackItem *ScopeNestedCFG::FindParentScope(ScopeStackItem::Kind ScopeKind) {
+  for (auto it = m_ScopeStack.rbegin(); it != m_ScopeStack.rend(); ++it) {
+    ScopeStackItem &SSI = *it;
+    if (SSI.ScopeKind == ScopeKind)
+      return &SSI;
+  }
+
+  IFT(DXC_E_SCOPE_NESTED_FAILED);
+  return nullptr;
+}
+
+void ScopeNestedCFG::PopScope() {
+  m_ScopeStack.pop_back();
+}
+
+void ScopeNestedCFG::AddEdge(BasicBlock *pClonedSrcBB, unsigned SuccSlotIdx, BasicBlock *pDstBB,
+                             unordered_map<BasicBlock *, vector<BasicBlock *> > &Edges) {
+  DXASSERT_NOMSG(pDstBB != nullptr);
+  TerminatorInst *pTI = pClonedSrcBB->getTerminator();
+  vector<BasicBlock *> *pSuccessors;
+  auto it = Edges.find(pClonedSrcBB);
+  if (it == Edges.end()) {
+    Edges[pClonedSrcBB] = vector<BasicBlock *>(pTI->getNumSuccessors());
+    pSuccessors = &Edges[pClonedSrcBB];
+  } else {
+    pSuccessors = &it->second;
+  }
+
+  (*pSuccessors)[SuccSlotIdx] = pDstBB;
+}
+
+BasicBlock *ScopeNestedCFG::CloneBasicBlockAndFixupValues(const BasicBlock *pBB,
+                                                          ValueToValueMapTy &RegionValueRemap,
+                                                          const Twine &NameSuffix) {
+  // Create a clone.
+  ValueToValueMapTy CloneMap;
+  BasicBlock *pCloneBB = CloneBasicBlock(pBB, CloneMap, NameSuffix);
+
+  // Update remapped values to the value remap for the acyclic region.
+  for (auto it = CloneMap.begin(), endIt = CloneMap.end(); it != endIt; ++it) {
+    RegionValueRemap[it->first] = it->second;
+  }
+
+  // Fixup values.
+  for (auto itInst = pCloneBB->begin(), endInst = pCloneBB->end(); itInst != endInst; ++itInst) {
+    Instruction *pInst = itInst;
+    for (unsigned i = 0; i < pInst->getNumOperands(); i++) {
+      Value *V = pInst->getOperand(i);
+      auto itV = RegionValueRemap.find(V);
+      if (itV != RegionValueRemap.end()) {
+        // Replace the replicated value.
+        pInst->replaceUsesOfWith(V, itV->second);
+      }
+    }
+  }
+
+  return pCloneBB;
+}
+
+BasicBlock *ScopeNestedCFG::CloneNode(BasicBlock *pBB,
+                                      unordered_map<BasicBlock *, vector<BasicBlock *> > &BlockClones,
+                                      ValueToValueMapTy &RegionValueRemap) {
+  auto it = BlockClones.find(pBB);
+  if (it == BlockClones.end()) {
+    // First time we see this BB.
+    vector<BasicBlock *> V;
+    V.emplace_back(pBB);
+    BlockClones[pBB] = V;
+    return pBB;
+  }
+
+  // Create a clone.
+  BasicBlock *pCloneBB = CloneBasicBlockAndFixupValues(pBB, RegionValueRemap);
+  it->second.emplace_back(pCloneBB);
+  m_pFunc->getBasicBlockList().insertAfter(pBB, pCloneBB);
+
+  // Temporarily adjust successors.
+  for (unsigned i = 0; i < pCloneBB->getTerminator()->getNumSuccessors(); i++) {
+    pCloneBB->getTerminator()->setSuccessor(i, pCloneBB);
+  }
+
+  return pCloneBB;
+}
+
+BasicBlock *ScopeNestedCFG::CloneLoop(BasicBlock *pHeaderBB,
+                                      BasicBlock *pClonedPreHeaderBB,
+                                      unordered_map<BasicBlock *, vector<BasicBlock *> > &BlockClones,
+                                      unordered_map<BasicBlock *, vector<BasicBlock *> > &Edges,
+                                      ValueToValueMapTy &RegionValueRemap) {
+  // 1. clone every reachable node from LoopHeader (not! preheader) to LoopExit (if not null).
+  // 2. collect cloned edges along the way
+  // 3. update loop map [for this loop only] (in case we need to copy a cloned loop in the future).
+  DXASSERT_NOMSG(m_LoopMap.find(pHeaderBB) != m_LoopMap.end());
+  const LoopItem &LI = m_LoopMap.find(pHeaderBB)->second;
+  unordered_set<BasicBlock *> VisitedBlocks;
+  LoopItem ClonedLI;
+  ClonedLI.pLP = pClonedPreHeaderBB;
+
+  CloneLoopRec(pHeaderBB, nullptr, 0, BlockClones, Edges, VisitedBlocks, LI, ClonedLI, RegionValueRemap);
+
+  m_LoopMap[ClonedLI.pLB] = ClonedLI;
+  return ClonedLI.pLB;
+}
+
+BasicBlock *ScopeNestedCFG::CloneLoopRec(BasicBlock *pBB,
+                                  BasicBlock *pClonePredBB,
+                                  unsigned ClonedPredIdx,
+                                  unordered_map<BasicBlock *, vector<BasicBlock *> > &BlockClones,
+                                  unordered_map<BasicBlock *, vector<BasicBlock *> > &Edges,
+                                  unordered_set<BasicBlock *> &VisitedBlocks,
+                                  const LoopItem &LI,
+                                  LoopItem &ClonedLI,
+                                  ValueToValueMapTy &RegionValueRemap) {
+  auto itBB = VisitedBlocks.find(pBB);
+  if (itBB != VisitedBlocks.end()) {
+    BasicBlock *pClonedBB = *BlockClones[*itBB].rbegin();
+    // Clone the edge, but do not follow successors.
+    if (pClonePredBB != nullptr) {
+      AddEdge(pClonePredBB, ClonedPredIdx, pClonedBB, Edges);
+    }
+    return pClonedBB;
+  }
+  VisitedBlocks.insert(pBB);
+
+  // Clone myself.
+  BasicBlock *pClonedBB = CloneNode(pBB, BlockClones, RegionValueRemap);
+
+  // Add edge from the predecessor BB to myself.
+  if (pClonePredBB != nullptr) {
+    AddEdge(pClonePredBB, ClonedPredIdx, pClonedBB, Edges);
+  } else {
+    ClonedLI.pLB = pClonedBB;
+  }
+
+  // Loop exit?
+  if (pBB == LI.pLE) {
+    ClonedLI.pLE = pClonedBB;
+    return pClonedBB;
+  }
+
+  // Loop latch?
+  if (pBB == LI.pLL) {
+    ClonedLI.pLL = pClonedBB;
+    AddEdge(ClonedLI.pLL, 0, ClonedLI.pLB, Edges);
+  }
+
+  // Process successors.
+  TerminatorInst *pTI = pBB->getTerminator();
+  BasicBlock *pPrevSuccBB = nullptr;
+  BasicBlock *pPrevClonedSuccBB = nullptr;
+  for (unsigned SuccIdx = 0; SuccIdx < pTI->getNumSuccessors(); SuccIdx++) {
+    BasicBlock *pSuccBB = pTI->getSuccessor(SuccIdx);
+    if (pSuccBB != pPrevSuccBB) {
+      pPrevClonedSuccBB = CloneLoopRec(pSuccBB, pClonedBB, SuccIdx, BlockClones, Edges, 
+                                       VisitedBlocks, LI, ClonedLI, RegionValueRemap);
+      pPrevSuccBB = pSuccBB;
+    } else {
+      AddEdge(pClonedBB, SuccIdx, pPrevClonedSuccBB, Edges);
+    }
+  }
+
+  return pClonedBB;
+}
+
+//-----------------------------------------------------------------------------
+// Utility functions.
+//-----------------------------------------------------------------------------
+bool ScopeNestedCFG::IsIf(BasicBlock *pBB) {
+  return IsIf(pBB->getTerminator());
+}
+
+bool ScopeNestedCFG::IsIf(TerminatorInst *pTI) {
+  return pTI->getNumSuccessors() == 2 && dyn_cast<BranchInst>(pTI) != nullptr;
+}
+
+bool ScopeNestedCFG::IsSwitch(BasicBlock *pBB) {
+  return IsSwitch(pBB->getTerminator());
+}
+
+bool ScopeNestedCFG::IsSwitch(TerminatorInst *pTI) {
+  return dyn_cast<SwitchInst>(pTI) != nullptr;
+}
+
+Value *ScopeNestedCFG::GetFalse() {
+  return Constant::getIntegerValue(IntegerType::get(*m_pCtx, 1), APInt(1, 0));
+}
+
+Value *ScopeNestedCFG::GetTrue() {
+  return Constant::getIntegerValue(IntegerType::get(*m_pCtx, 1), APInt(1, 1));
+}
+
+ConstantInt *ScopeNestedCFG::GetI32Const(int v) {
+  return ConstantInt::get(*m_pCtx, APInt(32, v));
+}
+
+void ScopeNestedCFG::DumpIntSet(raw_ostream &s, set<unsigned> Set) {
+  s << "{ ";
+  for (auto it = Set.begin(); it != Set.end(); ++it)
+    s << *it << " ";
+  s << "}";
+}
+
+}
+
+
+using namespace ScopeNestedCFGNS;
+
+INITIALIZE_PASS_BEGIN(ScopeNestedCFG, "scopenested", "Scope-nested CFG transformation", false, false)
+INITIALIZE_PASS_DEPENDENCY(ReducibilityAnalysis)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_END(ScopeNestedCFG, "scopenested", "Scope-nested CFG transformation", false, false)
+
+namespace llvm {
+
+FunctionPass *createScopeNestedCFGPass() {
+  return new ScopeNestedCFG();
+}
+
+}

+ 11 - 0
projects/dxilconv/lib/ShaderBinary/CMakeLists.txt

@@ -0,0 +1,11 @@
+# Build ShaderBinary.lib.
+
+find_package(D3D12 REQUIRED)
+
+add_dxilconv_project_library(ShaderBinary
+  ShaderBinary.cpp
+)
+
+include_directories(
+    ${D3D12_INCLUDE_DIRS}
+)

+ 1246 - 0
projects/dxilconv/lib/ShaderBinary/ShaderBinary.cpp

@@ -0,0 +1,1246 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// ShaderBinary.cpp                                                          //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// Vertex shader binary format parsing and encoding.                         //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+// HLSL change start
+#include "ShaderBinaryIncludes.h"
+// HLSL change end
+
+/*==========================================================================;
+ *
+ *  D3D10ShaderBinary namespace
+ *
+ ***************************************************************************/
+
+namespace D3D10ShaderBinary
+{
+
+BOOL IsOpCodeValid(D3D10_SB_OPCODE_TYPE OpCode)
+{
+    return OpCode < D3D10_SB_NUM_OPCODES;
+}
+
+UINT GetNumInstructionOperands(D3D10_SB_OPCODE_TYPE OpCode)
+{
+    if (IsOpCodeValid(OpCode))
+        return g_InstructionInfo[OpCode].m_NumOperands;
+    else
+        throw E_FAIL;
+}
+
+CInstructionInfo g_InstructionInfo[D3D10_SB_NUM_OPCODES];
+
+void InitInstructionInfo()
+{
+#define SET(OpCode, Name, NumOperands, PrecMask, OpClass) \
+    (g_InstructionInfo[OpCode].Set(NumOperands, Name, OpClass, PrecMask))
+
+    SET (D3D10_SB_OPCODE_ADD, "add",                                                 3, 0x06, D3D10_SB_FLOAT_OP);
+    SET (D3D10_SB_OPCODE_AND, "and",                                                 3, 0x06, D3D10_SB_BIT_OP);
+    SET (D3D10_SB_OPCODE_BREAK, "break",                                             0, 0x00, D3D10_SB_FLOW_OP);
+    SET (D3D10_SB_OPCODE_BREAKC, "breakc",                                           1, 0x00, D3D10_SB_FLOW_OP);
+    SET (D3D10_SB_OPCODE_CALL, "call",                                               1, 0x00, D3D10_SB_FLOW_OP);
+    SET (D3D10_SB_OPCODE_CALLC, "callc",                                             2, 0x00, D3D10_SB_FLOW_OP);
+    SET (D3D10_SB_OPCODE_CONTINUE, "continue",                                       0, 0x00, D3D10_SB_FLOW_OP);
+    SET (D3D10_SB_OPCODE_CONTINUEC, "continuec",                                     1, 0x00, D3D10_SB_FLOW_OP);
+    SET (D3D10_SB_OPCODE_CASE, "case",                                               1, 0x00, D3D10_SB_FLOW_OP);
+    SET (D3D10_SB_OPCODE_CUT, "cut",                                                 0, 0x00, D3D10_SB_FLOW_OP);
+    SET (D3D10_SB_OPCODE_DEFAULT, "default",                                         0, 0x00, D3D10_SB_FLOW_OP);
+    SET (D3D10_SB_OPCODE_DISCARD, "discard",                                         1, 0x00, D3D10_SB_FLOW_OP);
+    SET (D3D10_SB_OPCODE_DIV, "div",                                                 3, 0x06, D3D10_SB_FLOAT_OP);
+    SET (D3D10_SB_OPCODE_DP2, "dp2",                                                 3, 0x06, D3D10_SB_FLOAT_OP);
+    SET (D3D10_SB_OPCODE_DP3, "dp3",                                                 3, 0x06, D3D10_SB_FLOAT_OP);
+    SET (D3D10_SB_OPCODE_DP4, "dp4",                                                 3, 0x06, D3D10_SB_FLOAT_OP);
+    SET (D3D10_SB_OPCODE_ELSE, "else",                                               0, 0x00, D3D10_SB_FLOW_OP);
+    SET (D3D10_SB_OPCODE_EMIT, "emit",                                               0, 0x00, D3D10_SB_FLOW_OP);
+    SET (D3D10_SB_OPCODE_EMITTHENCUT, "emit_then_cut",                               0, 0x00, D3D10_SB_FLOW_OP);
+    SET (D3D10_SB_OPCODE_ENDIF, "endif",                                             0, 0x00, D3D10_SB_FLOW_OP);
+    SET (D3D10_SB_OPCODE_ENDLOOP, "endloop",                                         0, 0x00, D3D10_SB_FLOW_OP);
+    SET (D3D10_SB_OPCODE_ENDSWITCH, "endswitch",                                     0, 0x00, D3D10_SB_FLOW_OP);
+    SET (D3D10_SB_OPCODE_EQ, "eq",                                                   3, 0x00, D3D10_SB_FLOAT_OP);
+    SET (D3D10_SB_OPCODE_EXP, "exp",                                                 2, 0x02, D3D10_SB_FLOAT_OP);
+    SET (D3D10_SB_OPCODE_FRC, "frc",                                                 2, 0x02, D3D10_SB_FLOAT_OP);
+    SET (D3D10_SB_OPCODE_FTOI, "ftoi",                                               2, 0x00, D3D10_SB_FLOAT_OP);
+    SET (D3D10_SB_OPCODE_FTOU, "ftou",                                               2, 0x00, D3D10_SB_FLOAT_OP);
+    SET (D3D10_SB_OPCODE_GE, "ge",                                                   3, 0x00, D3D10_SB_FLOAT_OP);
+    SET (D3D10_SB_OPCODE_DERIV_RTX, "deriv_rtx",                                     2, 0x02, D3D10_SB_FLOAT_OP);
+    SET (D3D10_SB_OPCODE_DERIV_RTY, "deriv_rty",                                     2, 0x02, D3D10_SB_FLOAT_OP);
+    SET (D3D10_SB_OPCODE_IADD, "iadd",                                               3, 0x06, D3D10_SB_INT_OP);
+    SET (D3D10_SB_OPCODE_IF, "if",                                                   1, 0x00, D3D10_SB_FLOW_OP);
+    SET (D3D10_SB_OPCODE_IEQ, "ieq",                                                 3, 0x00, D3D10_SB_INT_OP);
+    SET (D3D10_SB_OPCODE_IGE, "ige",                                                 3, 0x00, D3D10_SB_INT_OP);
+    SET (D3D10_SB_OPCODE_ILT, "ilt",                                                 3, 0x00, D3D10_SB_INT_OP);
+    SET (D3D10_SB_OPCODE_IMAD, "imad",                                               4, 0x0e, D3D10_SB_INT_OP);
+    SET (D3D10_SB_OPCODE_IMAX, "imax",                                               3, 0x06, D3D10_SB_INT_OP);
+    SET (D3D10_SB_OPCODE_IMIN, "imin",                                               3, 0x06, D3D10_SB_INT_OP);
+    SET (D3D10_SB_OPCODE_IMUL, "imul",                                               4, 0x0c, D3D10_SB_INT_OP);
+    SET (D3D10_SB_OPCODE_INE, "ine",                                                 3, 0x00, D3D10_SB_INT_OP);
+    SET (D3D10_SB_OPCODE_INEG, "ineg",                                               2, 0x02, D3D10_SB_INT_OP);
+    SET (D3D10_SB_OPCODE_ISHL, "ishl",                                               3, 0x02, D3D10_SB_INT_OP);
+    SET (D3D10_SB_OPCODE_ISHR, "ishr",                                               3, 0x02, D3D10_SB_INT_OP);
+    SET (D3D10_SB_OPCODE_ITOF, "itof",                                               2, 0x00, D3D10_SB_INT_OP);
+    SET (D3D10_SB_OPCODE_LABEL, "label",                                             1, 0x00, D3D10_SB_FLOW_OP);
+    SET (D3D10_SB_OPCODE_LD, "ld",                                                   3, 0x00, D3D10_SB_TEX_OP);
+    SET (D3D10_SB_OPCODE_LD_MS, "ldms",                                              4, 0x00, D3D10_SB_TEX_OP);
+    SET (D3D10_SB_OPCODE_LOG, "log",                                                 2, 0x02, D3D10_SB_FLOAT_OP);
+    SET (D3D10_SB_OPCODE_LOOP, "loop",                                               0, 0x00, D3D10_SB_FLOW_OP);
+    SET (D3D10_SB_OPCODE_LT, "lt",                                                   3, 0x00, D3D10_SB_FLOAT_OP);
+    SET (D3D10_SB_OPCODE_MAD, "mad",                                                 4, 0x0e, D3D10_SB_FLOAT_OP);
+    SET (D3D10_SB_OPCODE_MAX, "max",                                                 3, 0x06, D3D10_SB_FLOAT_OP);
+    SET (D3D10_SB_OPCODE_MIN, "min",                                                 3, 0x06, D3D10_SB_FLOAT_OP);
+    SET (D3D10_SB_OPCODE_MOV, "mov",                                                 2, 0x02, D3D10_SB_FLOAT_OP);
+    SET (D3D10_SB_OPCODE_MOVC, "movc",                                               4, 0x0c, D3D10_SB_FLOAT_OP);
+    SET (D3D10_SB_OPCODE_MUL, "mul",                                                 3, 0x06, D3D10_SB_FLOAT_OP);
+    SET (D3D10_SB_OPCODE_NE, "ne",                                                   3, 0x00, D3D10_SB_FLOAT_OP);
+    SET (D3D10_SB_OPCODE_NOP, "nop",                                                 0, 0x00, D3D10_SB_FLOW_OP);
+    SET (D3D10_SB_OPCODE_NOT, "not",                                                 2, 0x02, D3D10_SB_BIT_OP);
+    SET (D3D10_SB_OPCODE_OR, "or",                                                   3, 0x06, D3D10_SB_BIT_OP);
+    SET (D3D10_SB_OPCODE_RESINFO, "resinfo",                                         3, 0x00, D3D10_SB_TEX_OP);
+    SET (D3D10_SB_OPCODE_RET, "ret",                                                 0, 0x00, D3D10_SB_FLOW_OP);
+    SET (D3D10_SB_OPCODE_RETC, "retc",                                               1, 0x00, D3D10_SB_FLOW_OP);
+    SET (D3D10_SB_OPCODE_ROUND_NE, "round_ne",                                       2, 0x02, D3D10_SB_FLOAT_OP);
+    SET (D3D10_SB_OPCODE_ROUND_NI, "round_ni",                                       2, 0x02, D3D10_SB_FLOAT_OP);
+    SET (D3D10_SB_OPCODE_ROUND_PI, "round_pi",                                       2, 0x02, D3D10_SB_FLOAT_OP);
+    SET (D3D10_SB_OPCODE_ROUND_Z, "round_z",                                         2, 0x02, D3D10_SB_FLOAT_OP);
+    SET (D3D10_SB_OPCODE_RSQ, "rsq",                                                 2, 0x02, D3D10_SB_FLOAT_OP);
+    SET (D3D10_SB_OPCODE_SAMPLE, "sample",                                           4, 0x00, D3D10_SB_TEX_OP);
+    SET (D3D10_SB_OPCODE_SAMPLE_B, "sample_b",                                       5, 0x00, D3D10_SB_TEX_OP);
+    SET (D3D10_SB_OPCODE_SAMPLE_L, "sample_l",                                       5, 0x00, D3D10_SB_TEX_OP);
+    SET (D3D10_SB_OPCODE_SAMPLE_D, "sample_d",                                       6, 0x00, D3D10_SB_TEX_OP);
+    SET (D3D10_SB_OPCODE_SAMPLE_C, "sample_c",                                       5, 0x00, D3D10_SB_TEX_OP);
+    SET (D3D10_SB_OPCODE_SAMPLE_C_LZ, "sample_c_lz",                                 5, 0x00, D3D10_SB_TEX_OP);
+    SET (D3D10_SB_OPCODE_SQRT, "sqrt",                                               2, 0x02, D3D10_SB_FLOAT_OP);
+    SET (D3D10_SB_OPCODE_SWITCH, "switch",                                           1, 0x00, D3D10_SB_FLOW_OP);
+    SET (D3D10_SB_OPCODE_SINCOS, "sincos",                                           3, 0x04, D3D10_SB_FLOAT_OP);
+    SET (D3D10_SB_OPCODE_UDIV, "udiv",                                               4, 0x0c, D3D10_SB_UINT_OP);
+    SET (D3D10_SB_OPCODE_ULT, "ult",                                                 3, 0x00, D3D10_SB_UINT_OP);
+    SET (D3D10_SB_OPCODE_UGE, "uge",                                                 3, 0x00, D3D10_SB_UINT_OP);
+    SET (D3D10_SB_OPCODE_UMAX, "umax",                                               3, 0x06, D3D10_SB_UINT_OP);
+    SET (D3D10_SB_OPCODE_UMIN, "umin",                                               3, 0x06, D3D10_SB_UINT_OP);
+    SET (D3D10_SB_OPCODE_UMUL, "umul",                                               4, 0x0c, D3D10_SB_UINT_OP);
+    SET (D3D10_SB_OPCODE_UMAD, "umad",                                               4, 0x0e, D3D10_SB_UINT_OP);
+    SET (D3D10_SB_OPCODE_USHR, "ushr",                                               3, 0x02, D3D10_SB_UINT_OP);
+    SET (D3D10_SB_OPCODE_UTOF, "utof",                                               2, 0x00, D3D10_SB_UINT_OP);
+    SET (D3D10_SB_OPCODE_XOR, "xor",                                                 3, 0x06, D3D10_SB_BIT_OP);
+    SET (D3D10_SB_OPCODE_RESERVED0, "jmp",                                           0, 0x00, D3D10_SB_FLOW_OP);
+    SET (D3D10_SB_OPCODE_DCL_INPUT, "dcl_input",                                     1, 0x00, D3D10_SB_DCL_OP);
+    SET (D3D10_SB_OPCODE_DCL_OUTPUT, "dcl_output",                                   1, 0x00, D3D10_SB_DCL_OP);
+    SET (D3D10_SB_OPCODE_DCL_INPUT_SGV, "dcl_input_sgv",                             1, 0x00, D3D10_SB_DCL_OP);
+    SET (D3D10_SB_OPCODE_DCL_INPUT_PS_SGV, "dcl_input_ps_sgv",                       1, 0x00, D3D10_SB_DCL_OP);
+    SET (D3D10_SB_OPCODE_DCL_GS_INPUT_PRIMITIVE, "dcl_inputprimitive",               0, 0x00, D3D10_SB_DCL_OP);
+    SET (D3D10_SB_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY, "dcl_outputtopology",     0, 0x00, D3D10_SB_DCL_OP);
+    SET (D3D10_SB_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT, "dcl_maxout",                  0, 0x00, D3D10_SB_DCL_OP);
+    SET (D3D10_SB_OPCODE_DCL_INPUT_PS, "dcl_input_ps",                               1, 0x00, D3D10_SB_DCL_OP);
+    SET (D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER, "dcl_constantbuffer",                  1, 0x00, D3D10_SB_DCL_OP);
+    SET (D3D10_SB_OPCODE_DCL_SAMPLER, "dcl_sampler",                                 1, 0x00, D3D10_SB_DCL_OP);
+    SET (D3D10_SB_OPCODE_DCL_RESOURCE, "dcl_resource",                               1, 0x00, D3D10_SB_DCL_OP);
+    SET (D3D10_SB_OPCODE_DCL_INPUT_SIV, "dcl_input_siv",                             1, 0x00, D3D10_SB_DCL_OP);
+    SET (D3D10_SB_OPCODE_DCL_INPUT_PS_SIV, "dcl_input_ps_siv",                       1, 0x00, D3D10_SB_DCL_OP);
+    SET (D3D10_SB_OPCODE_DCL_OUTPUT_SIV, "dcl_output_siv",                           1, 0x00, D3D10_SB_DCL_OP);
+    SET (D3D10_SB_OPCODE_DCL_OUTPUT_SGV, "dcl_output_sgv",                           1, 0x00, D3D10_SB_DCL_OP);
+    SET (D3D10_SB_OPCODE_DCL_TEMPS, "dcl_temps",                                     0, 0x00, D3D10_SB_DCL_OP);
+    SET (D3D10_SB_OPCODE_DCL_INDEXABLE_TEMP, "dcl_indexableTemp",                    0, 0x00, D3D10_SB_DCL_OP);
+    SET (D3D10_SB_OPCODE_DCL_INDEX_RANGE, "dcl_indexrange",                          1, 0x00, D3D10_SB_DCL_OP);
+    SET (D3D10_SB_OPCODE_DCL_GLOBAL_FLAGS, "dcl_globalFlags",                        0, 0x00, D3D10_SB_DCL_OP);
+
+    SET (D3D10_1_SB_OPCODE_SAMPLE_INFO, "sampleinfo",                                2, 0x00, D3D10_SB_TEX_OP);
+    SET (D3D10_1_SB_OPCODE_SAMPLE_POS, "samplepos",                                  3, 0x00, D3D10_SB_TEX_OP);
+    SET (D3D10_1_SB_OPCODE_GATHER4, "gather4",                                       4, 0x00, D3D10_SB_TEX_OP);
+    SET (D3D10_1_SB_OPCODE_LOD, "lod",                                               4, 0x00, D3D10_SB_TEX_OP);
+
+    SET (D3D11_SB_OPCODE_EMIT_STREAM, "emit_stream",                                 1, 0x00, D3D10_SB_FLOW_OP);
+    SET (D3D11_SB_OPCODE_CUT_STREAM, "cut_stream",                                   1, 0x00, D3D10_SB_FLOW_OP);
+    SET (D3D11_SB_OPCODE_EMITTHENCUT_STREAM, "emit_then_cut_stream",                 1, 0x00, D3D10_SB_FLOW_OP);
+    SET (D3D11_SB_OPCODE_INTERFACE_CALL, "fcall",                                    1, 0x00, D3D10_SB_FLOW_OP);
+
+    SET (D3D11_SB_OPCODE_DCL_STREAM, "dcl_stream",                                   1, 0x00, D3D10_SB_DCL_OP);
+    SET (D3D11_SB_OPCODE_DCL_FUNCTION_BODY, "dcl_function_body",                     0, 0x00, D3D10_SB_DCL_OP);
+    SET (D3D11_SB_OPCODE_DCL_FUNCTION_TABLE, "dcl_function_table",                   0, 0x00, D3D10_SB_DCL_OP);
+    SET (D3D11_SB_OPCODE_DCL_INTERFACE, "dcl_interface",                             0, 0x00, D3D10_SB_DCL_OP);
+
+    SET (D3D11_SB_OPCODE_BUFINFO, "bufinfo",                                         2, 0x00, D3D10_SB_TEX_OP);
+    SET (D3D11_SB_OPCODE_DERIV_RTX_COARSE, "deriv_rtx_coarse",                       2, 0x02, D3D10_SB_FLOAT_OP);
+    SET (D3D11_SB_OPCODE_DERIV_RTX_FINE, "deriv_rtx_fine",                           2, 0x02, D3D10_SB_FLOAT_OP);
+    SET (D3D11_SB_OPCODE_DERIV_RTY_COARSE, "deriv_rty_coarse",                       2, 0x02, D3D10_SB_FLOAT_OP);
+    SET (D3D11_SB_OPCODE_DERIV_RTY_FINE, "deriv_rty_fine",                           2, 0x02, D3D10_SB_FLOAT_OP);
+    SET (D3D11_SB_OPCODE_GATHER4_C, "gather4_c",                                     5, 0x00, D3D10_SB_TEX_OP);
+    SET (D3D11_SB_OPCODE_GATHER4_PO, "gather4_po",                                   5, 0x00, D3D10_SB_TEX_OP);
+    SET (D3D11_SB_OPCODE_GATHER4_PO_C, "gather4_po_c",                               6, 0x00, D3D10_SB_TEX_OP);
+    SET (D3D11_SB_OPCODE_RCP, "rcp",                                                 2, 0x02, D3D10_SB_FLOAT_OP);
+    SET (D3D11_SB_OPCODE_F32TOF16, "f32tof16",                                       2, 0x00, D3D10_SB_FLOAT_OP);
+    SET (D3D11_SB_OPCODE_F16TOF32, "f16tof32",                                       2, 0x00, D3D10_SB_FLOAT_OP);
+    SET (D3D11_SB_OPCODE_UADDC, "uaddc",                                             4, 0x0c, D3D10_SB_UINT_OP);
+    SET (D3D11_SB_OPCODE_USUBB, "usubb",                                             4, 0x0c, D3D10_SB_UINT_OP);
+    SET (D3D11_SB_OPCODE_COUNTBITS, "countbits",                                     2, 0x02, D3D10_SB_BIT_OP);
+    SET (D3D11_SB_OPCODE_FIRSTBIT_HI, "firstbit_hi",                                 2, 0x02, D3D10_SB_BIT_OP);
+    SET (D3D11_SB_OPCODE_FIRSTBIT_LO, "firstbit_lo",                                 2, 0x02, D3D10_SB_BIT_OP);
+    SET (D3D11_SB_OPCODE_FIRSTBIT_SHI, "firstbit_shi",                               2, 0x02, D3D10_SB_BIT_OP);
+    SET (D3D11_SB_OPCODE_UBFE, "ubfe",                                               4, 0x02, D3D10_SB_BIT_OP);
+    SET (D3D11_SB_OPCODE_IBFE, "ibfe",                                               4, 0x02, D3D10_SB_BIT_OP);
+    SET (D3D11_SB_OPCODE_BFI, "bfi",                                                 5, 0x02, D3D10_SB_BIT_OP);
+    SET (D3D11_SB_OPCODE_BFREV, "bfrev",                                             2, 0x02, D3D10_SB_BIT_OP);
+    SET (D3D11_SB_OPCODE_SWAPC, "swapc",                                             5, 0x02, D3D10_SB_FLOAT_OP);
+
+    SET (D3D11_SB_OPCODE_HS_DECLS, "hs_decls",                                       0, 0x00, D3D10_SB_DCL_OP);
+    SET (D3D11_SB_OPCODE_HS_CONTROL_POINT_PHASE, "hs_control_point_phase",           0, 0x00, D3D10_SB_DCL_OP);
+    SET (D3D11_SB_OPCODE_HS_FORK_PHASE, "hs_fork_phase",                             0, 0x00, D3D10_SB_DCL_OP);
+    SET (D3D11_SB_OPCODE_HS_JOIN_PHASE, "hs_join_phase",                             0, 0x00, D3D10_SB_DCL_OP);
+
+    SET (D3D11_SB_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT, "dcl_input_control_point_count",   0, 0x00, D3D10_SB_DCL_OP);
+    SET (D3D11_SB_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT, "dcl_output_control_point_count", 0, 0x00, D3D10_SB_DCL_OP);
+    SET (D3D11_SB_OPCODE_DCL_TESS_DOMAIN, "dcl_tessellator_domain",                        0, 0x00, D3D10_SB_DCL_OP);
+    SET (D3D11_SB_OPCODE_DCL_TESS_PARTITIONING, "dcl_tessellator_partitioning",            0, 0x00, D3D10_SB_DCL_OP);
+    SET (D3D11_SB_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE, "dcl_tessellator_output_primitive",    0, 0x00, D3D10_SB_DCL_OP);
+    SET (D3D11_SB_OPCODE_DCL_HS_MAX_TESSFACTOR, "dcl_hs_max_tessfactor",                   0, 0x00, D3D10_SB_DCL_OP);
+    SET (D3D11_SB_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT, "dcl_hs_fork_phase_instance_count", 0, 0x00, D3D10_SB_DCL_OP);
+    SET (D3D11_SB_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT, "dcl_hs_join_phase_instance_count", 0, 0x00, D3D10_SB_DCL_OP);
+
+    SET (D3D11_SB_OPCODE_DCL_THREAD_GROUP, "dcl_thread_group",                       0, 0x00, D3D10_SB_DCL_OP);
+    SET (D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED, "dcl_uav_typed",           1, 0x00, D3D10_SB_DCL_OP);
+    SET (D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW, "dcl_uav_raw",               1, 0x00, D3D10_SB_DCL_OP);
+    SET (D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED, "dcl_uav_structured", 1, 0x00, D3D10_SB_DCL_OP);
+    SET (D3D11_SB_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW, "dcl_tgsm_raw",         1, 0x00, D3D10_SB_DCL_OP);
+    SET (D3D11_SB_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED, "dcl_tgsm_structured", 1, 0x00, D3D10_SB_DCL_OP);
+    SET (D3D11_SB_OPCODE_DCL_RESOURCE_RAW, "dcl_resource_raw",                       1, 0x00, D3D10_SB_DCL_OP);
+    SET (D3D11_SB_OPCODE_DCL_RESOURCE_STRUCTURED, "dcl_resource_structured",         1, 0x00, D3D10_SB_DCL_OP);
+    SET (D3D11_SB_OPCODE_LD_UAV_TYPED, "ld_uav_typed",                               3, 0x00, D3D11_SB_MEM_OP);    
+    SET (D3D11_SB_OPCODE_STORE_UAV_TYPED, "store_uav_typed",                         3, 0x00, D3D11_SB_MEM_OP);
+    SET (D3D11_SB_OPCODE_LD_RAW, "ld_raw",                                           3, 0x00, D3D11_SB_MEM_OP);
+    SET (D3D11_SB_OPCODE_STORE_RAW, "store_raw",                                     3, 0x00, D3D11_SB_MEM_OP);
+    SET (D3D11_SB_OPCODE_LD_STRUCTURED, "ld_structured",                             4, 0x00, D3D11_SB_MEM_OP);
+    SET (D3D11_SB_OPCODE_STORE_STRUCTURED, "store_structured",                       4, 0x00, D3D11_SB_MEM_OP);
+    SET (D3D11_SB_OPCODE_ATOMIC_AND, "atomic_and",                                   3, 0x00, D3D11_SB_ATOMIC_OP);
+    SET (D3D11_SB_OPCODE_ATOMIC_OR, "atomic_or",                                     3, 0x00, D3D11_SB_ATOMIC_OP);
+    SET (D3D11_SB_OPCODE_ATOMIC_XOR, "atomic_xor",                                   3, 0x00, D3D11_SB_ATOMIC_OP);
+    SET (D3D11_SB_OPCODE_ATOMIC_CMP_STORE, "atomic_cmp_store",                       4, 0x00, D3D11_SB_ATOMIC_OP);
+    SET (D3D11_SB_OPCODE_ATOMIC_IADD, "atomic_iadd",                                 3, 0x00, D3D11_SB_ATOMIC_OP); 
+    SET (D3D11_SB_OPCODE_ATOMIC_IMAX, "atomic_imax",                                 3, 0x00, D3D11_SB_ATOMIC_OP);
+    SET (D3D11_SB_OPCODE_ATOMIC_IMIN, "atomic_imin",                                 3, 0x00, D3D11_SB_ATOMIC_OP);
+    SET (D3D11_SB_OPCODE_ATOMIC_UMAX, "atomic_umax",                                 3, 0x00, D3D11_SB_ATOMIC_OP);
+    SET (D3D11_SB_OPCODE_ATOMIC_UMIN, "atomic_umin",                                 3, 0x00, D3D11_SB_ATOMIC_OP);
+    SET (D3D11_SB_OPCODE_IMM_ATOMIC_ALLOC, "imm_atomic_alloc",                       2, 0x00, D3D11_SB_ATOMIC_OP);
+    SET (D3D11_SB_OPCODE_IMM_ATOMIC_CONSUME, "imm_atomic_consume",                   2, 0x00, D3D11_SB_ATOMIC_OP);
+    SET (D3D11_SB_OPCODE_IMM_ATOMIC_IADD, "imm_atomic_iadd",                         4, 0x00, D3D11_SB_ATOMIC_OP);  
+    SET (D3D11_SB_OPCODE_IMM_ATOMIC_AND, "imm_atomic_and",                           4, 0x00, D3D11_SB_ATOMIC_OP);
+    SET (D3D11_SB_OPCODE_IMM_ATOMIC_OR, "imm_atomic_or",                             4, 0x00, D3D11_SB_ATOMIC_OP);  
+    SET (D3D11_SB_OPCODE_IMM_ATOMIC_XOR, "imm_atomic_xor",                           4, 0x00, D3D11_SB_ATOMIC_OP);  
+    SET (D3D11_SB_OPCODE_IMM_ATOMIC_EXCH, "imm_atomic_exch",                         4, 0x00, D3D11_SB_ATOMIC_OP);
+    SET (D3D11_SB_OPCODE_IMM_ATOMIC_CMP_EXCH, "imm_atomic_cmp_exch",                 5, 0x00, D3D11_SB_ATOMIC_OP);
+    SET (D3D11_SB_OPCODE_IMM_ATOMIC_IMAX, "imm_atomic_imax",                         4, 0x00, D3D11_SB_ATOMIC_OP);
+    SET (D3D11_SB_OPCODE_IMM_ATOMIC_IMIN, "imm_atomic_imin",                         4, 0x00, D3D11_SB_ATOMIC_OP);
+    SET (D3D11_SB_OPCODE_IMM_ATOMIC_UMAX, "imm_atomic_umax",                         4, 0x00, D3D11_SB_ATOMIC_OP);
+    SET (D3D11_SB_OPCODE_IMM_ATOMIC_UMIN, "imm_atomic_umin",                         4, 0x00, D3D11_SB_ATOMIC_OP);
+    SET (D3D11_SB_OPCODE_SYNC, "sync",                                               0, 0x00, D3D10_SB_FLOW_OP);
+    SET (D3D11_SB_OPCODE_EVAL_SNAPPED, "eval_snapped",                               3, 0x02, D3D10_SB_FLOAT_OP);
+    SET (D3D11_SB_OPCODE_EVAL_SAMPLE_INDEX, "eval_sample_index",                     3, 0x02, D3D10_SB_FLOAT_OP);
+    SET (D3D11_SB_OPCODE_EVAL_CENTROID, "eval_centroid",                             2, 0x02, D3D10_SB_FLOAT_OP);
+
+    SET (D3D11_SB_OPCODE_DCL_GS_INSTANCE_COUNT, "dcl_gsinstances",                   0, 0x00, D3D10_SB_DCL_OP);
+
+    SET (D3D11_SB_OPCODE_DADD, "dadd",                                               3, 0x06, D3D11_SB_DOUBLE_OP);
+    SET (D3D11_SB_OPCODE_DMAX, "dmax",                                               3, 0x06, D3D11_SB_DOUBLE_OP);
+    SET (D3D11_SB_OPCODE_DMIN, "dmin",                                               3, 0x06, D3D11_SB_DOUBLE_OP);
+    SET (D3D11_SB_OPCODE_DMUL, "dmul",                                               3, 0x06, D3D11_SB_DOUBLE_OP);
+    SET (D3D11_SB_OPCODE_DEQ, "deq",                                                 3, 0x00, D3D11_SB_DOUBLE_OP);
+    SET (D3D11_SB_OPCODE_DGE, "dge",                                                 3, 0x00, D3D11_SB_DOUBLE_OP);
+    SET (D3D11_SB_OPCODE_DLT, "dlt",                                                 3, 0x00, D3D11_SB_DOUBLE_OP);
+    SET (D3D11_SB_OPCODE_DNE, "dne",                                                 3, 0x00, D3D11_SB_DOUBLE_OP);
+    SET (D3D11_SB_OPCODE_DMOV, "dmov",                                               2, 0x02, D3D11_SB_DOUBLE_OP);
+    SET (D3D11_SB_OPCODE_DMOVC, "dmovc",                                             4, 0x0c, D3D11_SB_DOUBLE_OP);
+    SET (D3D11_SB_OPCODE_DTOF, "dtof",                                               2, 0x02, D3D11_SB_DOUBLE_TO_FLOAT_OP);
+    SET (D3D11_SB_OPCODE_FTOD, "ftod",                                               2, 0x00, D3D11_SB_FLOAT_TO_DOUBLE_OP);
+
+    SET (D3D11_SB_OPCODE_ABORT, "abort",                                             0, 0x00, D3D11_SB_DEBUG_OP);
+    SET (D3D11_SB_OPCODE_DEBUG_BREAK, "debug_break",                                 0, 0x00, D3D11_SB_DEBUG_OP);
+
+    SET (D3D11_1_SB_OPCODE_DDIV, "ddiv",                                             3, 0x06, D3D11_SB_DOUBLE_OP);
+    SET (D3D11_1_SB_OPCODE_DFMA, "dfma",                                             4, 0x0e, D3D11_SB_DOUBLE_OP);
+    SET (D3D11_1_SB_OPCODE_DRCP, "drcp",                                             2, 0x02, D3D11_SB_DOUBLE_OP);
+
+    SET (D3D11_1_SB_OPCODE_MSAD, "msad",                                             4, 0x0e, D3D10_SB_UINT_OP);
+
+    SET (D3D11_1_SB_OPCODE_DTOI, "dtoi",                                             2, 0x00, D3D11_SB_DOUBLE_OP);
+    SET (D3D11_1_SB_OPCODE_DTOU, "dtou",                                             2, 0x00, D3D11_SB_DOUBLE_OP);
+    SET (D3D11_1_SB_OPCODE_ITOD, "itod",                                             2, 0x00, D3D10_SB_INT_OP);
+    SET (D3D11_1_SB_OPCODE_UTOD, "utod",                                             2, 0x00, D3D10_SB_UINT_OP);
+
+    SET (D3DWDDM1_3_SB_OPCODE_GATHER4_FEEDBACK,"gather4_s",                          5, 0x00, D3D10_SB_TEX_OP);
+    SET (D3DWDDM1_3_SB_OPCODE_GATHER4_C_FEEDBACK,"gather4_c_s",                      6, 0x00, D3D10_SB_TEX_OP);
+    SET (D3DWDDM1_3_SB_OPCODE_GATHER4_PO_FEEDBACK,"gather4_po_s",                    6, 0x00, D3D10_SB_TEX_OP);
+    SET (D3DWDDM1_3_SB_OPCODE_GATHER4_PO_C_FEEDBACK,"gather4_po_c_s",                7, 0x00, D3D10_SB_TEX_OP);
+    SET (D3DWDDM1_3_SB_OPCODE_LD_FEEDBACK,"ld_s",                                    4, 0x00, D3D10_SB_TEX_OP);
+    SET (D3DWDDM1_3_SB_OPCODE_LD_MS_FEEDBACK,"ldms_s",                               5, 0x00, D3D10_SB_TEX_OP);
+    SET (D3DWDDM1_3_SB_OPCODE_LD_UAV_TYPED_FEEDBACK,"ld_uav_typed_s",                4, 0x00, D3D11_SB_MEM_OP);
+    SET (D3DWDDM1_3_SB_OPCODE_LD_RAW_FEEDBACK,"ld_raw_s",                            4, 0x00, D3D11_SB_MEM_OP);
+    SET (D3DWDDM1_3_SB_OPCODE_LD_STRUCTURED_FEEDBACK,"ld_structured_s",              5, 0x00, D3D11_SB_MEM_OP);
+    SET (D3DWDDM1_3_SB_OPCODE_SAMPLE_L_FEEDBACK,"sample_l_s",                        6, 0x00, D3D10_SB_TEX_OP);
+    SET (D3DWDDM1_3_SB_OPCODE_SAMPLE_C_LZ_FEEDBACK,"sample_c_lz_s",                  6, 0x00, D3D10_SB_TEX_OP);
+    SET (D3DWDDM1_3_SB_OPCODE_SAMPLE_CLAMP_FEEDBACK, "sample_cl_s",                  6, 0x00, D3D10_SB_TEX_OP);
+    SET (D3DWDDM1_3_SB_OPCODE_SAMPLE_B_CLAMP_FEEDBACK, "sample_b_cl_s",              7, 0x00, D3D10_SB_TEX_OP);
+    SET (D3DWDDM1_3_SB_OPCODE_SAMPLE_D_CLAMP_FEEDBACK,"sample_d_cl_s",               8, 0x00, D3D10_SB_TEX_OP);
+    SET (D3DWDDM1_3_SB_OPCODE_SAMPLE_C_CLAMP_FEEDBACK,"sample_c_cl_s",               7, 0x00, D3D10_SB_TEX_OP);
+    SET (D3DWDDM1_3_SB_OPCODE_CHECK_ACCESS_FULLY_MAPPED, "check_access_fully_mapped",2, 0x00, D3D10_SB_TEX_OP);
+}
+
+//*****************************************************************************
+//
+//  CShaderCodeParser
+//
+//*****************************************************************************
+
+void CShaderCodeParser::SetShader(CONST CShaderToken* pBuffer)
+{
+    m_pShaderCode = (CShaderToken*)pBuffer;
+    m_pShaderEndToken = (CShaderToken*)pBuffer + pBuffer[1];
+    // First OpCode token
+    m_pCurrentToken = (CShaderToken*)&pBuffer[2];
+}
+
+D3D10_SB_TOKENIZED_PROGRAM_TYPE CShaderCodeParser::ShaderType()
+{
+    return (D3D10_SB_TOKENIZED_PROGRAM_TYPE)DECODE_D3D10_SB_TOKENIZED_PROGRAM_TYPE(*m_pShaderCode);
+}
+
+UINT CShaderCodeParser::CurrentTokenOffset()
+{
+    return (UINT)(m_pCurrentToken - m_pShaderCode);
+}
+
+void CShaderCodeParser::SetCurrentTokenOffset(UINT Offset)
+{
+    m_pCurrentToken = m_pShaderCode + Offset;
+}
+
+UINT CShaderCodeParser::ShaderLengthInTokens()
+{
+    return m_pShaderCode[1];
+}
+
+UINT CShaderCodeParser::ShaderMinorVersion()
+{
+    return DECODE_D3D10_SB_TOKENIZED_PROGRAM_MINOR_VERSION(m_pShaderCode[0]);
+}
+
+UINT CShaderCodeParser::ShaderMajorVersion()
+{
+    return DECODE_D3D10_SB_TOKENIZED_PROGRAM_MAJOR_VERSION(m_pShaderCode[0]);
+}
+
+void CShaderCodeParser::ParseIndex(COperandIndex* pOperandIndex, D3D10_SB_OPERAND_INDEX_REPRESENTATION IndexType)
+{
+    switch (IndexType)
+    {
+    case D3D10_SB_OPERAND_INDEX_IMMEDIATE32:
+        pOperandIndex->m_RegIndex = *m_pCurrentToken++;
+        pOperandIndex->m_ComponentName = D3D10_SB_4_COMPONENT_X;
+        pOperandIndex->m_RelRegType = D3D10_SB_OPERAND_TYPE_IMMEDIATE32;
+        break;
+    case D3D10_SB_OPERAND_INDEX_IMMEDIATE64:
+        pOperandIndex->m_RegIndexA[0] = *m_pCurrentToken++;
+        pOperandIndex->m_RegIndexA[1] = *m_pCurrentToken++;
+        pOperandIndex->m_ComponentName = D3D10_SB_4_COMPONENT_X;
+        pOperandIndex->m_RelRegType = D3D10_SB_OPERAND_TYPE_IMMEDIATE64;
+        break;
+    case D3D10_SB_OPERAND_INDEX_RELATIVE:
+        {
+            COperand operand;
+            ParseOperand(&operand);
+            pOperandIndex->m_RelIndex = operand.m_Index[0].m_RegIndex;
+            pOperandIndex->m_RelIndex1 = operand.m_Index[1].m_RegIndex;
+            pOperandIndex->m_RelRegType = operand.m_Type;
+            pOperandIndex->m_IndexDimension = operand.m_IndexDimension;
+            pOperandIndex->m_ComponentName = operand.m_ComponentName;
+            pOperandIndex->m_MinPrecision = operand.m_MinPrecision;
+            break;
+        }
+    case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE:
+        {
+            pOperandIndex->m_RegIndex = *m_pCurrentToken++;
+            COperand operand;
+            ParseOperand(&operand);
+            pOperandIndex->m_RelIndex = operand.m_Index[0].m_RegIndex;
+            pOperandIndex->m_RelIndex1 = operand.m_Index[1].m_RegIndex;
+            pOperandIndex->m_RelRegType = operand.m_Type;
+            pOperandIndex->m_IndexDimension = operand.m_IndexDimension;
+            pOperandIndex->m_ComponentName = operand.m_ComponentName;
+            pOperandIndex->m_MinPrecision = operand.m_MinPrecision;
+        }
+        break;
+    default:
+        throw E_FAIL;
+    }
+}
+
+void CShaderCodeParser::ParseOperand(COperandBase* pOperand)
+{
+    CShaderToken Token = *m_pCurrentToken++;
+
+    pOperand->m_Type = DECODE_D3D10_SB_OPERAND_TYPE(Token);
+    pOperand->m_NumComponents = DECODE_D3D10_SB_OPERAND_NUM_COMPONENTS(Token);
+    pOperand->m_bExtendedOperand = DECODE_IS_D3D10_SB_OPERAND_EXTENDED(Token);
+
+    UINT NumComponents = 0;
+    switch (pOperand->m_NumComponents)
+    {
+    case D3D10_SB_OPERAND_1_COMPONENT:   NumComponents = 1; break;
+    case D3D10_SB_OPERAND_4_COMPONENT:   NumComponents = 4; break;
+    }
+
+    switch (pOperand->m_Type)
+    {
+    case D3D10_SB_OPERAND_TYPE_IMMEDIATE32:
+    case D3D10_SB_OPERAND_TYPE_IMMEDIATE64:
+        break;
+    default:
+        {
+            if (pOperand->m_NumComponents == D3D10_SB_OPERAND_4_COMPONENT)
+            {
+                // Component selection mode
+                pOperand->m_ComponentSelection = DECODE_D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE(Token);
+                switch(pOperand->m_ComponentSelection)
+                {
+                case D3D10_SB_OPERAND_4_COMPONENT_MASK_MODE:
+                    pOperand->m_WriteMask = DECODE_D3D10_SB_OPERAND_4_COMPONENT_MASK(Token);
+                    break;
+                case D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE:
+                    pOperand->m_Swizzle[0] = DECODE_D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_SOURCE(Token, 0);
+                    pOperand->m_Swizzle[1] = DECODE_D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_SOURCE(Token, 1);
+                    pOperand->m_Swizzle[2] = DECODE_D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_SOURCE(Token, 2);
+                    pOperand->m_Swizzle[3] = DECODE_D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_SOURCE(Token, 3);
+                    break;
+                case D3D10_SB_OPERAND_4_COMPONENT_SELECT_1_MODE:
+                {
+                    D3D10_SB_4_COMPONENT_NAME Component = DECODE_D3D10_SB_OPERAND_4_COMPONENT_SELECT_1(Token);
+                    pOperand->m_Swizzle[0] = static_cast<BYTE>(Component);
+                    pOperand->m_Swizzle[1] = static_cast<BYTE>(Component);
+                    pOperand->m_Swizzle[2] = static_cast<BYTE>(Component);
+                    pOperand->m_Swizzle[3] = static_cast<BYTE>(Component);
+                    pOperand->m_ComponentName = Component;
+                    break;
+                }
+                default:
+                    throw E_FAIL;
+                }
+            }
+            pOperand->m_IndexDimension = DECODE_D3D10_SB_OPERAND_INDEX_DIMENSION(Token);
+            if (pOperand->m_IndexDimension != D3D10_SB_OPERAND_INDEX_0D)
+            {
+                UINT NumDimensions = pOperand->m_IndexDimension;
+                // Index representation
+                for (UINT i=0; i < NumDimensions; i++)
+                {
+                    pOperand->m_IndexType[i] = DECODE_D3D10_SB_OPERAND_INDEX_REPRESENTATION(i, Token);
+                }
+            }
+            break;
+        }
+    }
+
+    // Extended operand
+    if (pOperand->m_bExtendedOperand)
+    {
+        Token = *m_pCurrentToken++;
+        pOperand->m_ExtendedOperandType = DECODE_D3D10_SB_EXTENDED_OPERAND_TYPE(Token);
+        if (pOperand->m_ExtendedOperandType == D3D10_SB_EXTENDED_OPERAND_MODIFIER)
+        {
+            pOperand->m_Modifier = DECODE_D3D10_SB_OPERAND_MODIFIER(Token);
+            pOperand->m_MinPrecision = DECODE_D3D11_SB_OPERAND_MIN_PRECISION(Token);
+            pOperand->m_Nonuniform = DECODE_D3D12_SB_OPERAND_NON_UNIFORM(Token);
+        }
+    }
+
+    switch( pOperand->m_Type )
+    {
+    case D3D10_SB_OPERAND_TYPE_IMMEDIATE32:
+    case D3D10_SB_OPERAND_TYPE_IMMEDIATE64:
+        for (UINT i=0 ; i < NumComponents; i++)
+        {
+            pOperand->m_Value[i] = *m_pCurrentToken++;
+        }
+        break;
+    }
+
+    // Operand indices
+    if (pOperand->m_IndexDimension != D3D10_SB_OPERAND_INDEX_0D)
+    {
+        const UINT NumDimensions = pOperand->m_IndexDimension;
+        // Index representation
+        for (UINT i=0; i < NumDimensions; i++)
+        {
+            ParseIndex(&pOperand->m_Index[i], pOperand->m_IndexType[i]);
+        }
+    }
+}
+
+void CShaderCodeParser::ParseInstruction(CInstruction* pInstruction)
+{
+    pInstruction->Clear(true);
+    CShaderToken* pStart = m_pCurrentToken;
+    CShaderToken Token = *m_pCurrentToken++;
+    pInstruction->m_OpCode = DECODE_D3D10_SB_OPCODE_TYPE(Token);
+    pInstruction->m_PreciseMask = DECODE_D3D11_SB_INSTRUCTION_PRECISE_VALUES(Token); 
+    pInstruction->m_bSaturate = DECODE_IS_D3D10_SB_INSTRUCTION_SATURATE_ENABLED(Token); 
+    UINT InstructionLength = DECODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(Token);
+    pInstruction->m_NumOperands = GetNumInstructionOperands(pInstruction->m_OpCode);
+    BOOL b51PlusShader = (ShaderMajorVersion() > 5 || (ShaderMajorVersion() == 5 && ShaderMinorVersion() > 0));
+    BOOL bExtended = DECODE_IS_D3D10_SB_OPCODE_EXTENDED(Token);
+    if( bExtended && (
+        (pInstruction->m_OpCode == D3D11_SB_OPCODE_DCL_INTERFACE)||
+        (pInstruction->m_OpCode == D3D11_SB_OPCODE_DCL_FUNCTION_TABLE)))
+    {
+        pInstruction->m_ExtendedOpCodeCount = 1;
+    #pragma prefast (suppress : __WARNING_LOCALDECLHIDESLOCAL, "This uses the same variable name for continuity.")
+        CShaderToken Token = *m_pCurrentToken++;
+        // these instructions may be longer than can fit in the normal instructionlength field
+        InstructionLength = (UINT)(Token);
+    }
+    else
+    {
+        pInstruction->m_ExtendedOpCodeCount = 0;
+        for(int i = 0; i < (bExtended ? D3D11_SB_MAX_SIMULTANEOUS_EXTENDED_OPCODES : 0); i++)
+        {   
+            pInstruction->m_ExtendedOpCodeCount++;
+    #pragma prefast (suppress : __WARNING_LOCALDECLHIDESLOCAL, "This uses the same variable name for continuity.")
+            CShaderToken Token = *m_pCurrentToken++;
+            bExtended = DECODE_IS_D3D10_SB_OPCODE_EXTENDED(Token);
+            pInstruction->m_OpCodeEx[i] = DECODE_D3D10_SB_EXTENDED_OPCODE_TYPE(Token);
+            switch(pInstruction->m_OpCodeEx[i])
+            {
+            case D3D10_SB_EXTENDED_OPCODE_SAMPLE_CONTROLS:
+                {
+                    pInstruction->m_TexelOffset[0] = (INT8)DECODE_IMMEDIATE_D3D10_SB_ADDRESS_OFFSET(D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_U, Token);
+                    pInstruction->m_TexelOffset[1] = (INT8)DECODE_IMMEDIATE_D3D10_SB_ADDRESS_OFFSET(D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_V, Token);
+                    pInstruction->m_TexelOffset[2] = (INT8)DECODE_IMMEDIATE_D3D10_SB_ADDRESS_OFFSET(D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_W, Token);
+                    for(UINT i = 0;i < 3;i++)
+                    {
+                        if(pInstruction->m_TexelOffset[i] & 0x8)
+                            pInstruction->m_TexelOffset[i] |= 0xfffffff0;
+                    }
+                    break;
+                }
+                break;
+            case D3D11_SB_EXTENDED_OPCODE_RESOURCE_DIM:
+                {
+                    pInstruction->m_ResourceDimEx = DECODE_D3D11_SB_EXTENDED_RESOURCE_DIMENSION(Token);
+                    pInstruction->m_ResourceDimStructureStrideEx = DECODE_D3D11_SB_EXTENDED_RESOURCE_DIMENSION_STRUCTURE_STRIDE(Token);
+                }
+                break;
+            case D3D11_SB_EXTENDED_OPCODE_RESOURCE_RETURN_TYPE:
+                {
+                    for(UINT j = 0; j < 4; j++)
+                    {
+                        pInstruction->m_ResourceReturnTypeEx[j] = DECODE_D3D11_SB_EXTENDED_RESOURCE_RETURN_TYPE(Token,j);
+                    }
+                }
+                break;
+            }
+            if( !bExtended )
+            {
+                break;
+            }
+        }
+    }
+    switch (pInstruction->m_OpCode)
+    {
+    case D3D10_SB_OPCODE_CUSTOMDATA:
+        pInstruction->m_PreciseMask = 0;
+        pInstruction->m_bSaturate = false;
+        pInstruction->m_NumOperands = 0;
+
+        // not bothering to keep custom-data for now. TODO: store
+        pInstruction->m_CustomData.Type = DECODE_D3D10_SB_CUSTOMDATA_CLASS(Token);
+        InstructionLength = *m_pCurrentToken;
+        if (*m_pCurrentToken <2)
+        {
+            InstructionLength = 2;
+            pInstruction->m_CustomData.pData = 0;
+            pInstruction->m_CustomData.DataSizeInBytes = 0;
+        }
+        else
+        {
+            pInstruction->m_CustomData.DataSizeInBytes = (*m_pCurrentToken-2)*4;
+            pInstruction->m_CustomData.pData = malloc((*m_pCurrentToken - 2)*sizeof(UINT));
+            if( NULL == pInstruction->m_CustomData.pData )
+            {
+                throw E_OUTOFMEMORY;
+            }
+            memcpy(pInstruction->m_CustomData.pData, m_pCurrentToken+1, (*m_pCurrentToken - 2)*4);
+
+            switch(pInstruction->m_CustomData.Type)
+            {
+            case D3D11_SB_CUSTOMDATA_SHADER_MESSAGE:
+                {
+                    CShaderMessage* pMessage = &pInstruction->m_CustomData.ShaderMessage;
+                    UINT Length = pInstruction->m_CustomData.DataSizeInBytes / 4;
+                    UINT* pData = (UINT*)pInstruction->m_CustomData.pData;
+
+                    ZeroMemory(pMessage, sizeof(*pMessage));
+                    
+                    if (Length < 6)
+                    {
+                        break;
+                    }
+
+                    UINT StrChars = pData[2];
+                    // Add one for the terminator and then round up.
+                    UINT StrWords = (StrChars + sizeof(DWORD)) / sizeof(DWORD);
+                    UINT NumOperands = pData[3];
+                    UINT OpLength = pData[4];
+
+                    // Enforce some basic sanity size limits.
+                    if (OpLength >= 0x10000 ||
+                        NumOperands >= 0x1000 ||
+                        StrWords >= 0x10000 ||
+                        Length < 5 + OpLength + StrWords)
+                    {
+                        break;
+                    }
+
+                    UINT* pOpEnd = &pData[5 + OpLength];
+
+                    pMessage->pOperands = (COperand*)malloc(NumOperands * sizeof(COperand));
+                    if (!pMessage->pOperands)
+                    {
+                        throw E_OUTOFMEMORY;
+                    }
+
+                    CONST CShaderToken* pOperands = (CShaderToken*)&pData[5];
+                    for (UINT i = 0; i < NumOperands; i++)
+                    {
+                        if (pOperands >= pOpEnd)
+                        {
+                            break;
+                        }
+
+                        pMessage->pOperands[i].Clear();                        
+                        pOperands = ParseOperandAt(&pMessage->pOperands[i],
+                                                   pOperands,
+                                                   pOpEnd);
+                    }
+                    if (pOperands != pOpEnd)
+                    {
+                        free(pMessage->pOperands);
+                        pMessage->pOperands = NULL;
+                        break;
+                    }
+                    
+                    // Now that we're sure everything is valid we can
+                    // fill in the message info.
+                    pMessage->MessageID = (D3D11_SB_SHADER_MESSAGE_ID)pData[0];
+                    pMessage->FormatStyle = (D3D11_SB_SHADER_MESSAGE_FORMAT)pData[1];
+                    pMessage->pFormatString = (PCSTR)pOpEnd;
+                    pMessage->NumOperands = NumOperands;
+                    break;
+                }
+            case D3D10_SB_CUSTOMDATA_COMMENT:
+                {
+                    // Guarantee that the C string comment is Null-terminated
+                    *((LPSTR)pInstruction->m_CustomData.pData + pInstruction->m_CustomData.DataSizeInBytes - 1) = '\0';
+                    break;
+                }
+            }
+        }      
+        break;
+    case D3D11_SB_OPCODE_DCL_FUNCTION_BODY:
+        pInstruction->m_FunctionBodyDecl.FunctionBodyNumber = (UINT)(*m_pCurrentToken);
+        m_pCurrentToken++;
+        break;
+    case D3D11_SB_OPCODE_DCL_FUNCTION_TABLE:
+        pInstruction->m_FunctionTableDecl.FunctionTableNumber = (UINT)(*m_pCurrentToken);
+        m_pCurrentToken++;
+        pInstruction->m_FunctionTableDecl.TableLength = (UINT)(*m_pCurrentToken);
+
+        // opcode
+        // instruction length if extended instruction
+        // table ID
+        // table length
+        // data
+        assert(InstructionLength == (3 + (bExtended?1:0) + pInstruction->m_FunctionTableDecl.TableLength));
+
+        pInstruction->m_FunctionTableDecl.pFunctionIdentifiers =
+            (UINT*) malloc(pInstruction->m_FunctionTableDecl.TableLength*sizeof(UINT));
+
+        if( NULL == pInstruction->m_FunctionTableDecl.pFunctionIdentifiers )
+        {
+            throw E_OUTOFMEMORY;
+        }
+
+        m_pCurrentToken++;
+
+        memcpy(pInstruction->m_FunctionTableDecl.pFunctionIdentifiers, m_pCurrentToken,
+               pInstruction->m_FunctionTableDecl.TableLength*sizeof(UINT));
+        break;
+    case D3D11_SB_OPCODE_DCL_INTERFACE:
+        pInstruction->m_InterfaceDecl.bDynamicallyIndexed = DECODE_D3D11_SB_INTERFACE_INDEXED_BIT(Token);
+        pInstruction->m_InterfaceDecl.InterfaceNumber = (WORD)(*m_pCurrentToken);
+        m_pCurrentToken++;
+        pInstruction->m_InterfaceDecl.ExpectedTableSize = (UINT)(*m_pCurrentToken);
+        m_pCurrentToken++;
+        // there's a limit of 64k types, so that gives a max length on this table.
+        pInstruction->m_InterfaceDecl.TableLength = DECODE_D3D11_SB_INTERFACE_TABLE_LENGTH(*m_pCurrentToken);
+        // this puts a limit on the size of interface arrays at 64k
+        pInstruction->m_InterfaceDecl.ArrayLength = DECODE_D3D11_SB_INTERFACE_ARRAY_LENGTH(*m_pCurrentToken);
+
+        // opcode
+        // instruction length if extended instruction
+        // interface ID
+        // table size
+        // num types/array length
+        // data
+        assert(InstructionLength == (4 + (bExtended?1:0) + pInstruction->m_InterfaceDecl.TableLength));
+
+        pInstruction->m_InterfaceDecl.pFunctionTableIdentifiers =
+            (UINT*) malloc(pInstruction->m_InterfaceDecl.TableLength*sizeof(UINT));
+
+        if( NULL == pInstruction->m_InterfaceDecl.pFunctionTableIdentifiers )
+        {
+            throw E_OUTOFMEMORY;
+        }
+
+        m_pCurrentToken++;
+
+        memcpy(pInstruction->m_InterfaceDecl.pFunctionTableIdentifiers, m_pCurrentToken,
+               pInstruction->m_InterfaceDecl.TableLength*sizeof(UINT));
+        break;
+    case D3D11_SB_OPCODE_INTERFACE_CALL:
+        pInstruction->m_InterfaceCall.FunctionIndex = *m_pCurrentToken++;
+        pInstruction->m_InterfaceCall.pInterfaceOperand =
+            pInstruction->m_Operands;
+        ParseOperand(pInstruction->m_InterfaceCall.pInterfaceOperand);
+        break;
+    case D3D10_SB_OPCODE_DCL_RESOURCE:
+        pInstruction->m_ResourceDecl.Dimension = DECODE_D3D10_SB_RESOURCE_DIMENSION(Token);
+        ParseOperand(&pInstruction->m_Operands[0]);
+        pInstruction->m_ResourceDecl.ReturnType[0] = DECODE_D3D10_SB_RESOURCE_RETURN_TYPE(*m_pCurrentToken, 0);
+        pInstruction->m_ResourceDecl.ReturnType[1] = DECODE_D3D10_SB_RESOURCE_RETURN_TYPE(*m_pCurrentToken, 1);
+        pInstruction->m_ResourceDecl.ReturnType[2] = DECODE_D3D10_SB_RESOURCE_RETURN_TYPE(*m_pCurrentToken, 2);
+        pInstruction->m_ResourceDecl.ReturnType[3] = DECODE_D3D10_SB_RESOURCE_RETURN_TYPE(*m_pCurrentToken, 3);
+        pInstruction->m_ResourceDecl.SampleCount = DECODE_D3D10_SB_RESOURCE_SAMPLE_COUNT(Token);
+        m_pCurrentToken++;
+        pInstruction->m_ResourceDecl.Space = 0;
+        if(b51PlusShader)
+        {
+            pInstruction->m_ResourceDecl.Space = (UINT)(*m_pCurrentToken++);
+        }
+        break;
+
+    case D3D10_SB_OPCODE_DCL_SAMPLER:
+        pInstruction->m_SamplerDecl.SamplerMode = DECODE_D3D10_SB_SAMPLER_MODE(Token);
+        ParseOperand(&pInstruction->m_Operands[0]);
+        pInstruction->m_SamplerDecl.Space = 0;
+        if(b51PlusShader)
+        {
+            pInstruction->m_SamplerDecl.Space = (UINT)(*m_pCurrentToken++);
+        }
+        break;
+
+    case D3D11_SB_OPCODE_DCL_STREAM:
+        ParseOperand(&pInstruction->m_Operands[0]);
+        break;
+
+    case D3D10_SB_OPCODE_DCL_TEMPS:
+        pInstruction->m_TempsDecl.NumTemps = (UINT)(*m_pCurrentToken);
+        m_pCurrentToken++;
+        break;
+
+    case D3D10_SB_OPCODE_DCL_INDEXABLE_TEMP:
+        pInstruction->m_IndexableTempDecl.IndexableTempNumber = (UINT)(*m_pCurrentToken);
+        m_pCurrentToken++;
+        pInstruction->m_IndexableTempDecl.NumRegisters  = (UINT)(*m_pCurrentToken);
+        m_pCurrentToken++;
+        switch( min( 4u, max( 1u, (UINT)(*m_pCurrentToken) ) ) )
+        {
+        case 1:
+            pInstruction->m_IndexableTempDecl.Mask = D3D10_SB_OPERAND_4_COMPONENT_MASK_X;
+            break;
+        case 2:
+            pInstruction->m_IndexableTempDecl.Mask = D3D10_SB_OPERAND_4_COMPONENT_MASK_X |
+                                                     D3D10_SB_OPERAND_4_COMPONENT_MASK_Y;
+            break;
+        case 3:
+            pInstruction->m_IndexableTempDecl.Mask = D3D10_SB_OPERAND_4_COMPONENT_MASK_X |
+                                                     D3D10_SB_OPERAND_4_COMPONENT_MASK_Y |
+                                                     D3D10_SB_OPERAND_4_COMPONENT_MASK_Z;
+            break;
+        case 4:
+            pInstruction->m_IndexableTempDecl.Mask = D3D10_SB_OPERAND_4_COMPONENT_MASK_ALL;
+            break;
+        }
+        m_pCurrentToken++;
+        break;
+
+    case D3D10_SB_OPCODE_DCL_INPUT:
+    case D3D10_SB_OPCODE_DCL_OUTPUT:
+        ParseOperand(&pInstruction->m_Operands[0]);
+        break;
+
+    case D3D10_SB_OPCODE_DCL_INPUT_SIV:
+        ParseOperand(&pInstruction->m_Operands[0]);
+        pInstruction->m_InputDeclSIV.Name = DECODE_D3D10_SB_NAME(*m_pCurrentToken);
+        m_pCurrentToken++;
+        break;
+
+    case D3D10_SB_OPCODE_DCL_INPUT_SGV:
+        ParseOperand(&pInstruction->m_Operands[0]);
+        pInstruction->m_InputDeclSIV.Name = DECODE_D3D10_SB_NAME(*m_pCurrentToken);
+        m_pCurrentToken++;
+        break;
+
+    case D3D10_SB_OPCODE_DCL_INPUT_PS:
+        pInstruction->m_InputPSDecl.InterpolationMode = DECODE_D3D10_SB_INPUT_INTERPOLATION_MODE(Token);
+        ParseOperand(&pInstruction->m_Operands[0]);
+        break;
+
+    case D3D10_SB_OPCODE_DCL_INPUT_PS_SIV:
+        pInstruction->m_InputPSDeclSIV.InterpolationMode = DECODE_D3D10_SB_INPUT_INTERPOLATION_MODE(Token);
+        ParseOperand(&pInstruction->m_Operands[0]);
+        pInstruction->m_InputPSDeclSIV.Name = DECODE_D3D10_SB_NAME(*m_pCurrentToken);
+        m_pCurrentToken++;
+        break;
+
+    case D3D10_SB_OPCODE_DCL_INPUT_PS_SGV:
+        pInstruction->m_InputPSDeclSGV.InterpolationMode = DECODE_D3D10_SB_INPUT_INTERPOLATION_MODE(Token);
+        ParseOperand(&pInstruction->m_Operands[0]);
+        pInstruction->m_InputPSDeclSGV.Name = DECODE_D3D10_SB_NAME(*m_pCurrentToken);
+        m_pCurrentToken++;
+        break;
+
+    case D3D10_SB_OPCODE_DCL_OUTPUT_SIV:
+        ParseOperand(&pInstruction->m_Operands[0]);
+        pInstruction->m_OutputDeclSIV.Name = DECODE_D3D10_SB_NAME(*m_pCurrentToken);
+        m_pCurrentToken++;
+        break;
+
+    case D3D10_SB_OPCODE_DCL_OUTPUT_SGV:
+        ParseOperand(&pInstruction->m_Operands[0]);
+        pInstruction->m_OutputDeclSGV.Name = DECODE_D3D10_SB_NAME(*m_pCurrentToken);
+        m_pCurrentToken++;
+        break;
+
+    case D3D10_SB_OPCODE_DCL_INDEX_RANGE:
+        ParseOperand(&pInstruction->m_Operands[0]);
+        pInstruction->m_IndexRangeDecl.RegCount = (UINT)(*m_pCurrentToken);
+        m_pCurrentToken++;
+        break;
+
+    case D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER:
+        pInstruction->m_ConstantBufferDecl.AccessPattern = DECODE_D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN(Token);
+        ParseOperand(&pInstruction->m_Operands[0]);
+        pInstruction->m_ConstantBufferDecl.Space = 0;
+        if(b51PlusShader)
+        {
+            pInstruction->m_ConstantBufferDecl.Size = (UINT)(*m_pCurrentToken++);
+            pInstruction->m_ConstantBufferDecl.Space = (UINT)(*m_pCurrentToken++);
+        }
+        else
+        {
+            pInstruction->m_ConstantBufferDecl.Size = pInstruction->m_Operands[0].m_Index[1].m_RegIndex;
+        }
+        break;
+
+    case D3D10_SB_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY:
+        pInstruction->m_OutputTopologyDecl.Topology = DECODE_D3D10_SB_GS_OUTPUT_PRIMITIVE_TOPOLOGY(Token);
+        break;
+
+    case D3D10_SB_OPCODE_DCL_GS_INPUT_PRIMITIVE:
+        pInstruction->m_InputPrimitiveDecl.Primitive = DECODE_D3D10_SB_GS_INPUT_PRIMITIVE(Token);
+        break;
+
+    case D3D10_SB_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT:
+        pInstruction->m_GSMaxOutputVertexCountDecl.MaxOutputVertexCount = (UINT)(*m_pCurrentToken);
+        m_pCurrentToken++;
+        break;
+
+    case D3D11_SB_OPCODE_DCL_GS_INSTANCE_COUNT:
+        pInstruction->m_GSInstanceCountDecl.InstanceCount = (UINT)(*m_pCurrentToken);
+        m_pCurrentToken++;
+        break;
+
+    case D3D10_SB_OPCODE_DCL_GLOBAL_FLAGS:
+        pInstruction->m_GlobalFlagsDecl.Flags = DECODE_D3D10_SB_GLOBAL_FLAGS(Token);
+        break;
+
+    case D3D11_SB_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT:
+        pInstruction->m_InputControlPointCountDecl.InputControlPointCount = DECODE_D3D11_SB_INPUT_CONTROL_POINT_COUNT(Token);
+        break;
+
+    case D3D11_SB_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT:
+        pInstruction->m_OutputControlPointCountDecl.OutputControlPointCount = DECODE_D3D11_SB_OUTPUT_CONTROL_POINT_COUNT(Token);
+        break;
+
+    case D3D11_SB_OPCODE_DCL_TESS_DOMAIN:
+        pInstruction->m_TessellatorDomainDecl.TessellatorDomain = DECODE_D3D11_SB_TESS_DOMAIN(Token);
+        break;
+
+    case D3D11_SB_OPCODE_DCL_TESS_PARTITIONING:
+        pInstruction->m_TessellatorPartitioningDecl.TessellatorPartitioning = DECODE_D3D11_SB_TESS_PARTITIONING(Token);
+        break;
+
+    case D3D11_SB_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE:
+        pInstruction->m_TessellatorOutputPrimitiveDecl.TessellatorOutputPrimitive = DECODE_D3D11_SB_TESS_OUTPUT_PRIMITIVE(Token);
+        break;
+
+    case D3D11_SB_OPCODE_DCL_HS_MAX_TESSFACTOR:
+        pInstruction->m_HSMaxTessFactorDecl.MaxTessFactor = *(float*)m_pCurrentToken;
+        m_pCurrentToken++;
+        break;
+
+    case D3D11_SB_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT:
+        pInstruction->m_HSForkPhaseInstanceCountDecl.InstanceCount = *(UINT*)m_pCurrentToken;
+        m_pCurrentToken++;
+        break;
+    case D3D11_SB_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT:
+        pInstruction->m_HSJoinPhaseInstanceCountDecl.InstanceCount = *(UINT*)m_pCurrentToken;
+        m_pCurrentToken++;
+        break;
+    case D3D11_SB_OPCODE_DCL_THREAD_GROUP:
+        pInstruction->m_ThreadGroupDecl.x = *(UINT*)m_pCurrentToken++;
+        pInstruction->m_ThreadGroupDecl.y = *(UINT*)m_pCurrentToken++;
+        pInstruction->m_ThreadGroupDecl.z = *(UINT*)m_pCurrentToken++;
+        break;
+    case D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED:
+        pInstruction->m_TypedUAVDecl.Dimension = DECODE_D3D10_SB_RESOURCE_DIMENSION(Token);
+        pInstruction->m_TypedUAVDecl.Flags = DECODE_D3D11_SB_RESOURCE_FLAGS(Token);
+        ParseOperand(&pInstruction->m_Operands[0]);
+        pInstruction->m_TypedUAVDecl.ReturnType[0] = DECODE_D3D10_SB_RESOURCE_RETURN_TYPE(*m_pCurrentToken, 0);
+        pInstruction->m_TypedUAVDecl.ReturnType[1] = DECODE_D3D10_SB_RESOURCE_RETURN_TYPE(*m_pCurrentToken, 1);
+        pInstruction->m_TypedUAVDecl.ReturnType[2] = DECODE_D3D10_SB_RESOURCE_RETURN_TYPE(*m_pCurrentToken, 2);
+        pInstruction->m_TypedUAVDecl.ReturnType[3] = DECODE_D3D10_SB_RESOURCE_RETURN_TYPE(*m_pCurrentToken, 3);
+        m_pCurrentToken++;
+        pInstruction->m_TypedUAVDecl.Space = 0;
+        if(b51PlusShader)
+        {
+            pInstruction->m_TypedUAVDecl.Space = (UINT)(*m_pCurrentToken++);
+        }
+        break;
+    case D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW:
+        pInstruction->m_RawUAVDecl.Flags = DECODE_D3D11_SB_RESOURCE_FLAGS(Token);
+        ParseOperand(&pInstruction->m_Operands[0]);       
+        pInstruction->m_RawUAVDecl.Space = 0;
+        if(b51PlusShader)
+        {
+            pInstruction->m_RawUAVDecl.Space = (UINT)(*m_pCurrentToken++);
+        }
+        break;
+    case D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED:
+        pInstruction->m_StructuredUAVDecl.Flags = DECODE_D3D11_SB_RESOURCE_FLAGS(Token);
+        ParseOperand(&pInstruction->m_Operands[0]);
+        pInstruction->m_StructuredUAVDecl.ByteStride = *(UINT*)m_pCurrentToken++;
+        pInstruction->m_StructuredUAVDecl.Space = 0;
+        if(b51PlusShader)
+        {
+            pInstruction->m_StructuredUAVDecl.Space = (UINT)(*m_pCurrentToken++);
+        }
+        break;
+    case D3D11_SB_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW:
+        ParseOperand(&pInstruction->m_Operands[0]);
+        pInstruction->m_RawTGSMDecl.ByteCount = *(UINT*)m_pCurrentToken++;
+        break;
+    case D3D11_SB_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED:
+        ParseOperand(&pInstruction->m_Operands[0]);
+        pInstruction->m_StructuredTGSMDecl.StructByteStride = *(UINT*)m_pCurrentToken++;
+        pInstruction->m_StructuredTGSMDecl.StructCount = *(UINT*)m_pCurrentToken++;
+        break;
+    case D3D11_SB_OPCODE_DCL_RESOURCE_RAW:
+        ParseOperand(&pInstruction->m_Operands[0]);       
+        pInstruction->m_RawSRVDecl.Space = 0;
+        if(b51PlusShader)
+        {
+            pInstruction->m_RawSRVDecl.Space = (UINT)(*m_pCurrentToken++);
+        }
+        break;
+    case D3D11_SB_OPCODE_DCL_RESOURCE_STRUCTURED:
+        ParseOperand(&pInstruction->m_Operands[0]);       
+        pInstruction->m_StructuredSRVDecl.ByteStride = *(UINT*)m_pCurrentToken++;
+        pInstruction->m_StructuredSRVDecl.Space = 0;
+        if(b51PlusShader)
+        {
+            pInstruction->m_StructuredSRVDecl.Space = (UINT)(*m_pCurrentToken++);
+        }
+        break;
+    case D3D11_SB_OPCODE_SYNC:
+        {
+            DWORD flags = DECODE_D3D11_SB_SYNC_FLAGS(Token);
+            pInstruction->m_SyncFlags.bThreadsInGroup = (flags & D3D11_SB_SYNC_THREADS_IN_GROUP) ? true : false;
+            pInstruction->m_SyncFlags.bThreadGroupSharedMemory = (flags & D3D11_SB_SYNC_THREAD_GROUP_SHARED_MEMORY) ? true : false;
+            pInstruction->m_SyncFlags.bUnorderedAccessViewMemoryGroup = (flags & D3D11_SB_SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GROUP ) ? true : false;
+            pInstruction->m_SyncFlags.bUnorderedAccessViewMemoryGlobal = (flags & D3D11_SB_SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GLOBAL ) ? true : false;
+        }
+        break;
+    case D3D10_SB_OPCODE_RESINFO:
+        pInstruction->m_ResInfoReturnType = DECODE_D3D10_SB_RESINFO_INSTRUCTION_RETURN_TYPE(Token);
+        ParseOperand(&pInstruction->m_Operands[0]);
+        ParseOperand(&pInstruction->m_Operands[1]);
+        ParseOperand(&pInstruction->m_Operands[2]);
+        break;
+
+    case D3D10_1_SB_OPCODE_SAMPLE_INFO:
+        pInstruction->m_InstructionReturnType = DECODE_D3D10_SB_INSTRUCTION_RETURN_TYPE(Token);
+        ParseOperand(&pInstruction->m_Operands[0]);
+        ParseOperand(&pInstruction->m_Operands[1]);
+        break;
+
+    case D3D10_SB_OPCODE_IF:
+    case D3D10_SB_OPCODE_BREAKC:
+    case D3D10_SB_OPCODE_CONTINUEC:
+    case D3D10_SB_OPCODE_RETC:
+    case D3D10_SB_OPCODE_DISCARD:
+        pInstruction->SetTest(DECODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(Token));
+        ParseOperand(&pInstruction->m_Operands[0]);
+        break;
+    case D3D10_SB_OPCODE_CALLC:
+        pInstruction->SetTest(DECODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(Token));
+        ParseOperand(&pInstruction->m_Operands[0]);
+        ParseOperand(&pInstruction->m_Operands[1]);
+        break;
+    default:
+        {
+          
+            for (UINT i=0; i < pInstruction->m_NumOperands; i++)
+            {
+                ParseOperand(&pInstruction->m_Operands[i]);
+            }
+            break;
+        }
+    }
+    m_pCurrentToken = pStart + InstructionLength;
+}
+
+// ****************************************************************************
+//
+// class CShaderAsm
+//
+// ****************************************************************************
+
+void CShaderAsm::EmitOperand(const COperandBase& operand)
+{
+    CShaderToken Token = ENCODE_D3D10_SB_OPERAND_TYPE(operand.m_Type) |
+                            ENCODE_D3D10_SB_OPERAND_NUM_COMPONENTS(operand.m_NumComponents) |
+                            ENCODE_D3D10_SB_OPERAND_EXTENDED(operand.m_bExtendedOperand);
+
+    BOOL bProcessOperandIndices = FALSE;
+    if (!(operand.m_Type == D3D10_SB_OPERAND_TYPE_IMMEDIATE32 ||
+          operand.m_Type == D3D10_SB_OPERAND_TYPE_IMMEDIATE64))
+    {
+        Token |= ENCODE_D3D10_SB_OPERAND_INDEX_DIMENSION(operand.m_IndexDimension);
+        if (operand.m_NumComponents == D3D10_SB_OPERAND_4_COMPONENT)
+        {
+            // Component selection mode
+            Token |= ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE(operand.m_ComponentSelection);
+            switch(operand.m_ComponentSelection)
+            {
+            case D3D10_SB_OPERAND_4_COMPONENT_MASK_MODE:
+                Token |= ENCODE_D3D10_SB_OPERAND_4_COMPONENT_MASK(operand.m_WriteMask );
+                break;
+            case D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE:
+                Token |= ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE(operand.m_Swizzle[0],
+                                                                operand.m_Swizzle[1],
+                                                                operand.m_Swizzle[2],
+                                                                operand.m_Swizzle[3]);
+                break;
+            case D3D10_SB_OPERAND_4_COMPONENT_SELECT_1_MODE:
+            {
+                Token |= ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SELECT_1(operand.m_ComponentName);
+                break;
+            }
+            default:
+                throw E_FAIL;
+            }
+        }
+
+        UINT NumDimensions = operand.m_IndexDimension;
+        if (NumDimensions > 0)
+        {
+            bProcessOperandIndices = TRUE;
+            // Encode index representation
+            for (UINT i=0; i < NumDimensions; i++)
+            {
+                Token |= ENCODE_D3D10_SB_OPERAND_INDEX_REPRESENTATION(i, operand.m_IndexType[i]);
+            }
+        }
+        FUNC(Token);
+    }
+
+    // Extended operand
+    if (operand.m_bExtendedOperand)
+    {
+        Token = ENCODE_D3D10_SB_EXTENDED_OPERAND_TYPE(operand.m_ExtendedOperandType);
+        if (operand.m_ExtendedOperandType == D3D10_SB_EXTENDED_OPERAND_MODIFIER)
+        {
+            Token |= ENCODE_D3D10_SB_EXTENDED_OPERAND_MODIFIER(operand.m_Modifier);
+            Token |= ENCODE_D3D11_SB_OPERAND_MIN_PRECISION(operand.m_MinPrecision);
+            Token |= ENCODE_D3D12_SB_OPERAND_NON_UNIFORM(operand.m_Nonuniform);
+        }
+        FUNC(Token);
+    }
+
+    if( operand.m_Type == D3D10_SB_OPERAND_TYPE_IMMEDIATE32 ||
+        operand.m_Type == D3D10_SB_OPERAND_TYPE_IMMEDIATE64)
+    {
+        FUNC(Token);
+        UINT n = 0;
+        if (operand.m_NumComponents == D3D10_SB_OPERAND_4_COMPONENT)
+            n = 4;
+        else 
+        if (operand.m_NumComponents == D3D10_SB_OPERAND_1_COMPONENT)
+            n = 1;
+        else
+        {
+            throw E_FAIL;
+        }
+        for (UINT i=0 ; i < n; i++)
+        {
+            FUNC(operand.m_Value[i]);
+        }
+    }
+
+    // Operand indices
+    if (bProcessOperandIndices)
+    {
+        const UINT NumDimensions = operand.m_IndexDimension;
+        // Encode index representation
+        for (UINT i=0; i < NumDimensions; i++)
+        {
+            switch (operand.m_IndexType[i])
+            {
+            case D3D10_SB_OPERAND_INDEX_IMMEDIATE32:
+                FUNC(operand.m_Index[i].m_RegIndex);
+                break;
+            case D3D10_SB_OPERAND_INDEX_IMMEDIATE64:
+                FUNC(operand.m_Index[i].m_RegIndexA[0]);
+                FUNC(operand.m_Index[i].m_RegIndexA[1]);
+                break;
+            case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE:
+                FUNC(operand.m_Index[i].m_RegIndex);
+                // Fall through
+            case D3D10_SB_OPERAND_INDEX_RELATIVE:
+                {
+                    D3D10_SB_OPERAND_TYPE RelRegType = operand.m_Index[i].m_RelRegType;
+                    if( operand.m_Index[i].m_IndexDimension == D3D10_SB_OPERAND_INDEX_2D )
+                    {
+                        EmitOperand(COperand2D(RelRegType,
+                                               operand.m_Index[i].m_RelIndex,
+                                               operand.m_Index[i].m_RelIndex1,
+                                               operand.m_Index[i].m_ComponentName,
+                                               operand.m_Index[i].m_MinPrecision));
+                    }
+                    else
+                    {
+                        EmitOperand(COperand4(RelRegType,
+                                              operand.m_Index[i].m_RelIndex,
+                                              operand.m_Index[i].m_ComponentName,
+                                              operand.m_Index[i].m_MinPrecision));
+                    }
+                }
+                break;
+            default:
+                throw E_FAIL;
+            }
+        }
+    }
+}
+//-----------------------------------------------------------------------------
+void CShaderAsm::EmitInstruction(const CInstruction& instruction)
+{
+    UINT  OpCode;
+
+    if(instruction.m_OpCode == D3D10_SB_OPCODE_CUSTOMDATA)
+    {
+        OPCODE(D3D10_SB_OPCODE_CUSTOMDATA);
+        FUNC(instruction.m_CustomData.DataSizeInBytes/4 + 2);
+        for(UINT i = 0;i < instruction.m_CustomData.DataSizeInBytes/4; i++)
+            FUNC(((UINT*)instruction.m_CustomData.pData)[i]);
+
+
+        ENDINSTRUCTION();
+        return;
+    }
+
+    OpCode = ENCODE_D3D10_SB_OPCODE_TYPE(instruction.m_OpCode) | ENCODE_D3D10_SB_OPCODE_EXTENDED(instruction.m_ExtendedOpCodeCount > 0 ? true : false);
+    switch (instruction.m_OpCode)
+    {
+    case D3D10_SB_OPCODE_IF:
+    case D3D10_SB_OPCODE_BREAKC:
+    case D3D10_SB_OPCODE_CALLC:
+    case D3D10_SB_OPCODE_CONTINUEC:
+    case D3D10_SB_OPCODE_RETC:
+    case D3D10_SB_OPCODE_DISCARD:
+        OpCode |= ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(instruction.Test());
+        break;       
+    case D3D10_SB_OPCODE_RESINFO:
+        OpCode |= ENCODE_D3D10_SB_RESINFO_INSTRUCTION_RETURN_TYPE(instruction.m_ResInfoReturnType);
+        break;
+    case D3D10_1_SB_OPCODE_SAMPLE_INFO:
+        OpCode |= ENCODE_D3D10_SB_INSTRUCTION_RETURN_TYPE(instruction.m_InstructionReturnType);
+        break;
+    case D3D11_SB_OPCODE_SYNC:
+        OpCode |= ENCODE_D3D11_SB_SYNC_FLAGS(
+                  ( instruction.m_SyncFlags.bThreadsInGroup ? D3D11_SB_SYNC_THREADS_IN_GROUP : 0 ) |
+                  ( instruction.m_SyncFlags.bThreadGroupSharedMemory ? D3D11_SB_SYNC_THREAD_GROUP_SHARED_MEMORY : 0 ) |
+                  ( instruction.m_SyncFlags.bUnorderedAccessViewMemoryGlobal ? D3D11_SB_SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GLOBAL : 0 ) |
+                  ( instruction.m_SyncFlags.bUnorderedAccessViewMemoryGroup ? D3D11_SB_SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GROUP : 0 ) );
+        break;
+    };
+    OpCode |= ENCODE_D3D10_SB_INSTRUCTION_SATURATE(instruction.m_bSaturate);
+    OpCode |= ENCODE_D3D11_SB_INSTRUCTION_PRECISE_VALUES(instruction.m_PreciseMask);
+    OPCODE(OpCode);
+    
+    for(UINT i = 0; i < min(instruction.m_ExtendedOpCodeCount,(UINT)D3D11_SB_MAX_SIMULTANEOUS_EXTENDED_OPCODES); i++)
+    {
+        UINT  Extended = ENCODE_D3D10_SB_EXTENDED_OPCODE_TYPE(instruction.m_OpCodeEx[i]);
+        switch( instruction.m_OpCodeEx[i] )
+        {
+        case D3D10_SB_EXTENDED_OPCODE_SAMPLE_CONTROLS:
+            {
+                Extended |= ENCODE_IMMEDIATE_D3D10_SB_ADDRESS_OFFSET(D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_U, instruction.m_TexelOffset[0]);
+                Extended |= ENCODE_IMMEDIATE_D3D10_SB_ADDRESS_OFFSET(D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_V, instruction.m_TexelOffset[1]);
+                Extended |= ENCODE_IMMEDIATE_D3D10_SB_ADDRESS_OFFSET(D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_W, instruction.m_TexelOffset[2]);
+            }
+            break;
+        case D3D11_SB_EXTENDED_OPCODE_RESOURCE_DIM:
+            {
+                Extended |= ENCODE_D3D11_SB_EXTENDED_RESOURCE_DIMENSION(instruction.m_ResourceDimEx) |
+                    ENCODE_D3D11_SB_EXTENDED_RESOURCE_DIMENSION_STRUCTURE_STRIDE(instruction.m_ResourceDimStructureStrideEx);
+            }
+            break;
+        case D3D11_SB_EXTENDED_OPCODE_RESOURCE_RETURN_TYPE:
+            {
+                for(UINT j = 0; j < 4; j++)
+                {
+                    Extended |= ENCODE_D3D11_SB_EXTENDED_RESOURCE_RETURN_TYPE(instruction.m_ResourceReturnTypeEx[j],j);
+                }
+            }
+            break;
+        }
+        Extended |= ENCODE_D3D10_SB_OPCODE_EXTENDED((i + 1 < instruction.m_ExtendedOpCodeCount) ? true : false);
+        FUNC(Extended);
+    }
+    for (UINT i=0; i < instruction.m_NumOperands; i++)
+    {
+        EmitOperand(instruction.m_Operands[i]);
+    }
+    ENDINSTRUCTION();
+}
+
+//*****************************************************************************
+//
+//  CInstruction
+//
+//*****************************************************************************
+BOOL CInstruction::Disassemble( __out_ecount(StringSize) LPSTR pString, UINT StringSize)
+{
+    StringCchCopyA(pString, StringSize, g_InstructionInfo[m_OpCode].m_Name);
+    return TRUE;
+}
+
+
+}; // name space D3D10ShaderBinary
+
+// End of file : ShaderBinary.cpp

+ 24 - 0
projects/dxilconv/lib/ShaderBinary/ShaderBinaryIncludes.h

@@ -0,0 +1,24 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// ShaderBinaryIncludes.cpp                                                  //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#pragma once
+#include "windows.h"
+
+#include <assert.h>
+#include <float.h>
+#include <strsafe.h>
+#include <intsafe.h>
+#include <dxgiformat.h>
+#include <d3d12.h>
+#define D3DX12_NO_STATE_OBJECT_HELPERS
+#include "dxc/Support/d3dx12.h"
+#include "D3D12TokenizedProgramFormat.hpp"
+#include "ShaderBinary/ShaderBinary.h"
+
+#define ASSUME( _exp ) { assert( _exp ); __analysis_assume( _exp ); __assume( _exp ); }

+ 13 - 0
projects/dxilconv/test/dxbc2dxil-asm/assemble_dxbc.bat

@@ -0,0 +1,13 @@
+@echo off
+
+set TESTASM=%_NTTREE%\nttest\Windowstest\graphics\d3d\support\testasm.exe
+
+FOR %%f IN (call2.asm cs3.asm  cyclecounter.asm hs3.asm indexabletemp4.asm) DO (
+    %TESTASM% %%f /Fo %%~nf.dxbc
+)
+
+FOR %%f IN (indexabletemp6.asm) DO (
+    %TESTASM% %%f /allowMinimumPrecision /Fo %%~nf.dxbc
+)
+
+

+ 35 - 0
projects/dxilconv/test/dxbc2dxil-asm/call2.asm

@@ -0,0 +1,35 @@
+/*// RUN: %testasm %s /Fo %t.dxbc*/
+// RUN: %dxbc2dxil %t.dxbc /emit-llvm /o %t.ll.converted
+// RUN: fc %b.ref %t.ll.converted
+
+ps_5_0
+dcl_globalFlags refactoringAllowed
+dcl_input_ps linear v0.x
+dcl_input_ps constant v1.xyz
+dcl_output o0.x
+dcl_temps 1
+mov r0.xyz, v1.xyz
+call l0
+callc_nz r0.x, l0
+switch v1.x
+  case 1
+  call l2
+  callc_nz r0.y, l1
+  break
+  default
+  callc_nz r0.z, l2
+  break
+  case 2
+  break
+endswitch
+add o0.x, r0.x, l(1.000000)
+ret
+label l0
+mov r0.x, l(5.000000)
+ret
+label l1
+mov r0.x, v0.x
+ret
+label l2
+mov r0.x, l(3.000000)
+ret

BIN
projects/dxilconv/test/dxbc2dxil-asm/call2.dxbc


+ 129 - 0
projects/dxilconv/test/dxbc2dxil-asm/call2.ref

@@ -0,0 +1,129 @@
+
[email protected] = internal global i32 undef, align 4
+
+define internal void @dx.label.0() {
+entry:
+  store i32 1084227584, i32* @dx.v32.r0
+  ret void
+}
+
+define internal void @dx.label.1() {
+entry:
+  %0 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+  %1 = call i32 @dx.op.bitcastF32toI32(i32 127, float %0)
+  store i32 %1, i32* @dx.v32.r0
+  ret void
+}
+
+define internal void @dx.label.2() {
+entry:
+  store i32 1077936128, i32* @dx.v32.r0
+  ret void
+}
+
+define void @main() {
+entry:
+  %0 = call i32 @dx.op.loadInput.i32(i32 4, i32 1, i32 0, i8 0, i32 undef)
+  %1 = call i32 @dx.op.loadInput.i32(i32 4, i32 1, i32 0, i8 1, i32 undef)
+  %2 = call i32 @dx.op.loadInput.i32(i32 4, i32 1, i32 0, i8 2, i32 undef)
+  store i32 %0, i32* @dx.v32.r0
+  call void @dx.label.0()
+  %3 = load i32, i32* @dx.v32.r0
+  %4 = icmp ne i32 %3, 0
+  br i1 %4, label %label0.callc, label %label0.callc.1
+
+label0.callc:                                     ; preds = %entry
+  call void @dx.label.0()
+  br label %label0.callc.1
+
+label0.callc.1:                                   ; preds = %label0.callc, %entry
+  %5 = call i32 @dx.op.loadInput.i32(i32 4, i32 1, i32 0, i8 0, i32 undef)
+  switch i32 %5, label %switch0.default [
+    i32 1, label %switch0.casegroup0
+    i32 2, label %switch0.casegroup1
+  ]
+
+switch0.casegroup0:                               ; preds = %label0.callc.1
+  call void @dx.label.2()
+  %6 = icmp ne i32 %1, 0
+  br i1 %6, label %label1.callc, label %label1.callc.2
+
+label1.callc:                                     ; preds = %switch0.casegroup0
+  call void @dx.label.1()
+  br label %label1.callc.2
+
+label1.callc.2:                                   ; preds = %label1.callc, %switch0.casegroup0
+  br label %switch0.end
+
+switch0.default:                                  ; preds = %label0.callc.1
+  %7 = icmp ne i32 %2, 0
+  br i1 %7, label %label2.callc, label %label2.callc.3
+
+label2.callc:                                     ; preds = %switch0.default
+  call void @dx.label.2()
+  br label %label2.callc.3
+
+label2.callc.3:                                   ; preds = %label2.callc, %switch0.default
+  br label %switch0.end
+
+switch0.casegroup1:                               ; preds = %label0.callc.1
+  br label %switch0.end
+
+switch0.end:                                      ; preds = %switch0.casegroup1, %label2.callc.3, %label1.callc.2
+  %8 = load i32, i32* @dx.v32.r0
+  %9 = call float @dx.op.bitcastI32toF32(i32 126, i32 %8)
+  %10 = fadd fast float %9, 1.000000e+00
+  %11 = call i32 @dx.op.bitcastF32toI32(i32 127, float %10)
+  call void @dx.op.storeOutput.i32(i32 5, i32 0, i32 0, i8 0, i32 %11)
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @dx.op.loadInput.i32(i32, i32, i32, i8, i32) #0
+
+; Function Attrs: nounwind
+declare void @dx.op.tempRegStore.i32(i32, i32, i32) #1
+
+; Function Attrs: nounwind readonly
+declare i32 @dx.op.tempRegLoad.i32(i32, i32) #2
+
+; Function Attrs: nounwind readonly
+declare float @dx.op.tempRegLoad.f32(i32, i32) #2
+
+; Function Attrs: nounwind readnone
+declare i32 @dx.op.bitcastF32toI32(i32, float) #0
+
+; Function Attrs: nounwind
+declare void @dx.op.storeOutput.i32(i32, i32, i32, i8, i32) #1
+
+; Function Attrs: nounwind readnone
+declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #0
+
+; Function Attrs: nounwind
+declare void @dx.op.tempRegStore.f32(i32, i32, float) #1
+
+; Function Attrs: nounwind readnone
+declare float @dx.op.bitcastI32toF32(i32, i32) #0
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+attributes #2 = { nounwind readonly }
+
+!dx.version = !{!0}
+!dx.valver = !{!0}
+!dx.shaderModel = !{!1}
+!dx.entryPoints = !{!2}
+!llvm.ident = !{!11}
+
+!0 = !{i32 1, i32 0}
+!1 = !{!"ps", i32 6, i32 0}
+!2 = !{void ()* @main, !"main", !3, null, !10}
+!3 = !{!4, !8, null}
+!4 = !{!5, !7}
+!5 = !{i32 0, !"0_", i8 9, i8 0, !6, i8 2, i32 1, i8 1, i32 0, i8 0, null}
+!6 = !{i32 0}
+!7 = !{i32 1, !"1_", i8 5, i8 0, !6, i8 1, i32 1, i8 3, i32 1, i8 0, null}
+!8 = !{!9}
+!9 = !{i32 0, !"SV_Target", i8 5, i8 16, !6, i8 0, i32 1, i8 1, i32 0, i8 0, null}
+!10 = !{i32 0, i64 256}
+!11 = !{!"dxbc2dxil 1.2"}

+ 35 - 0
projects/dxilconv/test/dxbc2dxil-asm/cs3.asm

@@ -0,0 +1,35 @@
+/*// RUN: %testasm %s /Fo %t.dxbc*/
+// RUN: %dxbc2dxil %t.dxbc /emit-llvm /o %t.ll.converted
+// RUN: fc %b.ref %t.ll.converted
+
+cs_5_0
+dcl_globalFlags refactoringAllowed
+dcl_constantbuffer cb0[1], immediateIndexed
+dcl_input vThreadIDInGroup.xyz
+dcl_temps 3
+dcl_tgsm_raw g0, 1024
+dcl_thread_group 4, 2, 3
+
+ishl r0.x,  vThreadIDInGroup.z, l(2)
+store_raw g0.xy, r0.x, cb0[0].wzyx
+
+sync_g
+sync_ugroup
+sync_uglobal
+sync_g_t
+sync_ugroup_t
+sync_uglobal_t
+sync_ugroup_g
+sync_uglobal_g
+sync_ugroup_g_t
+sync_uglobal_g_t
+
+ld_raw r0.xz, r0.x, g0.zxwy
+
+imm_atomic_iadd r2.x, g0, r1.xyxx, vThreadIDInGroup.x
+atomic_or g0, r1.xyxx, vThreadIDInGroup.x
+
+atomic_cmp_store g0, r1.xyxx, vThreadIDInGroup.y, vThreadIDInGroup.x
+imm_atomic_cmp_exch r1.x, g0, r1.xyxx, vThreadIDInGroup.y, vThreadIDInGroup.x
+
+ret

BIN
projects/dxilconv/test/dxbc2dxil-asm/cs3.dxbc


+ 102 - 0
projects/dxilconv/test/dxbc2dxil-asm/cs3.ref

@@ -0,0 +1,102 @@
+
+%dx.types.Handle = type { i8* }
+%dx.types.CBufRet.i32 = type { i32, i32, i32, i32 }
+%dx.types.i8x16 = type { [16 x i8] }
+
+@TGSM0 = internal addrspace(3) global [1024 x i8] undef, align 4
[email protected] = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([1024 x i8], [1024 x i8] addrspace(3)* @TGSM0, i32 0, i32 0) to i8*)], section "llvm.metadata"
+
+define void @main() {
+entry:
+  %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 0, i1 false)
+  %1 = call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 2)
+  %2 = shl i32 %1, 2
+  %3 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %0, i32 0)
+  %4 = extractvalue %dx.types.CBufRet.i32 %3, 3
+  %5 = extractvalue %dx.types.CBufRet.i32 %3, 2
+  %6 = getelementptr [1024 x i8], [1024 x i8] addrspace(3)* @TGSM0, i32 0, i32 %2
+  %7 = bitcast i8 addrspace(3)* %6 to i32 addrspace(3)*
+  store i32 %4, i32 addrspace(3)* %7, align 4
+  %8 = add i32 %2, 4
+  %9 = getelementptr [1024 x i8], [1024 x i8] addrspace(3)* @TGSM0, i32 0, i32 %8
+  %10 = bitcast i8 addrspace(3)* %9 to i32 addrspace(3)*
+  store i32 %5, i32 addrspace(3)* %10, align 4
+  call void @dx.op.barrier(i32 80, i32 8)
+  call void @dx.op.barrier(i32 80, i32 4)
+  call void @dx.op.barrier(i32 80, i32 2)
+  call void @dx.op.barrier(i32 80, i32 9)
+  call void @dx.op.barrier(i32 80, i32 5)
+  call void @dx.op.barrier(i32 80, i32 3)
+  call void @dx.op.barrier(i32 80, i32 12)
+  call void @dx.op.barrier(i32 80, i32 10)
+  call void @dx.op.barrier(i32 80, i32 13)
+  call void @dx.op.barrier(i32 80, i32 11)
+  %11 = call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 0)
+  %12 = getelementptr [1024 x i8], [1024 x i8] addrspace(3)* @TGSM0, i32 0, i32 undef
+  %13 = bitcast i8 addrspace(3)* %12 to i32 addrspace(3)*
+  %14 = atomicrmw add i32 addrspace(3)* %13, i32 %11 monotonic
+  %15 = call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 0)
+  %16 = getelementptr [1024 x i8], [1024 x i8] addrspace(3)* @TGSM0, i32 0, i32 undef
+  %17 = bitcast i8 addrspace(3)* %16 to i32 addrspace(3)*
+  %18 = atomicrmw or i32 addrspace(3)* %17, i32 %15 monotonic
+  %19 = call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 1)
+  %20 = call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 0)
+  %21 = getelementptr [1024 x i8], [1024 x i8] addrspace(3)* @TGSM0, i32 0, i32 undef
+  %22 = bitcast i8 addrspace(3)* %21 to i32 addrspace(3)*
+  %23 = cmpxchg i32 addrspace(3)* %22, i32 %19, i32 %20 monotonic monotonic
+  %24 = call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 1)
+  %25 = call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 0)
+  %26 = getelementptr [1024 x i8], [1024 x i8] addrspace(3)* @TGSM0, i32 0, i32 undef
+  %27 = bitcast i8 addrspace(3)* %26 to i32 addrspace(3)*
+  %28 = cmpxchg i32 addrspace(3)* %27, i32 %24, i32 %25 monotonic monotonic
+  ret void
+}
+
+; Function Attrs: nounwind readonly
+declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #0
+
+; Function Attrs: nounwind readnone
+declare i32 @dx.op.threadIdInGroup.i32(i32, i32) #1
+
+; Function Attrs: nounwind
+declare void @dx.op.tempRegStore.i32(i32, i32, i32) #2
+
+; Function Attrs: nounwind readonly
+declare i32 @dx.op.tempRegLoad.i32(i32, i32) #0
+
+; Function Attrs: nounwind readonly
+declare %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32, %dx.types.Handle, i32) #0
+
+; Function Attrs: noduplicate nounwind
+declare void @dx.op.barrier(i32, i32) #3
+
+; Function Attrs: nounwind readnone
+declare float @dx.op.bitcastI32toF32(i32, i32) #1
+
+; Function Attrs: nounwind
+declare void @dx.op.tempRegStore.f32(i32, i32, float) #2
+
+; Function Attrs: nounwind readnone
+declare i32 @dx.op.bitcastF32toI32(i32, float) #1
+
+attributes #0 = { nounwind readonly }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind }
+attributes #3 = { noduplicate nounwind }
+
+!dx.version = !{!0}
+!dx.valver = !{!0}
+!dx.shaderModel = !{!1}
+!dx.resources = !{!2}
+!dx.entryPoints = !{!5}
+!llvm.ident = !{!8}
+
+!0 = !{i32 1, i32 0}
+!1 = !{!"cs", i32 6, i32 0}
+!2 = !{null, null, !3, null}
+!3 = !{!4}
+!4 = !{i32 0, %dx.types.i8x16 addrspace(2)* undef, !"CB0", i32 0, i32 0, i32 1, i32 16, null}
+!5 = !{void ()* @main, !"main", null, !2, !6}
+!6 = !{i32 0, i64 256, i32 4, !7}
+!7 = !{i32 4, i32 2, i32 3}
+!8 = !{!"dxbc2dxil 1.2"}

+ 11 - 0
projects/dxilconv/test/dxbc2dxil-asm/cyclecounter.asm

@@ -0,0 +1,11 @@
+/*// RUN: %testasm %s /Fo %t.dxbc*/
+// RUN: %dxbc2dxil %t.dxbc /emit-llvm /o %t.ll.converted
+// RUN: fc %b.ref %t.ll.converted
+
+ps_5_0
+dcl_temps 1
+dcl_output o0.xyzw
+dcl_input vCycleCounter.x
+mov r0, l(0,0,0,0)
+mov r0.z, vCycleCounter.x
+mov o0, r0

BIN
projects/dxilconv/test/dxbc2dxil-asm/cyclecounter.dxbc


+ 44 - 0
projects/dxilconv/test/dxbc2dxil-asm/cyclecounter.ref

@@ -0,0 +1,44 @@
+
+%dx.types.twoi32 = type { i32, i32 }
+
+define void @main() {
+entry:
+  %0 = call %dx.types.twoi32 @dx.op.cycleCounterLegacy(i32 109)
+  %1 = extractvalue %dx.types.twoi32 %0, 0
+  call void @dx.op.storeOutput.i32(i32 5, i32 0, i32 0, i8 0, i32 0)
+  call void @dx.op.storeOutput.i32(i32 5, i32 0, i32 0, i8 1, i32 0)
+  call void @dx.op.storeOutput.i32(i32 5, i32 0, i32 0, i8 2, i32 %1)
+  call void @dx.op.storeOutput.i32(i32 5, i32 0, i32 0, i8 3, i32 0)
+  ret void
+}
+
+; Function Attrs: nounwind
+declare void @dx.op.tempRegStore.i32(i32, i32, i32) #0
+
+; Function Attrs: nounwind
+declare %dx.types.twoi32 @dx.op.cycleCounterLegacy(i32) #0
+
+; Function Attrs: nounwind readonly
+declare i32 @dx.op.tempRegLoad.i32(i32, i32) #1
+
+; Function Attrs: nounwind
+declare void @dx.op.storeOutput.i32(i32, i32, i32, i8, i32) #0
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
+
+!dx.version = !{!0}
+!dx.valver = !{!0}
+!dx.shaderModel = !{!1}
+!dx.entryPoints = !{!2}
+!llvm.ident = !{!8}
+
+!0 = !{i32 1, i32 0}
+!1 = !{!"ps", i32 6, i32 0}
+!2 = !{void ()* @main, !"main", !3, null, !7}
+!3 = !{null, !4, null}
+!4 = !{!5}
+!5 = !{i32 0, !"SV_Target", i8 5, i8 16, !6, i8 0, i32 1, i8 4, i32 0, i8 0, null}
+!6 = !{i32 0}
+!7 = !{i32 0, i64 258}
+!8 = !{!"dxbc2dxil 1.2"}

+ 93 - 0
projects/dxilconv/test/dxbc2dxil-asm/hs3.asm

@@ -0,0 +1,93 @@
+/*// RUN: %testasm %s /Fo %t.dxbc*/
+// RUN: %dxbc2dxil %t.dxbc /emit-llvm /o %t.ll.converted
+// RUN: fc %b.ref %t.ll.converted
+
+hs_5_0
+hs_decls
+dcl_input_control_point_count   4
+dcl_output_control_point_count  32
+dcl_tessellator_domain             domain_quad
+dcl_tessellator_partitioning       partitioning_fractional_odd
+dcl_tessellator_output_primitive   output_triangle_cw
+dcl_hs_max_tessfactor              64.f
+hs_control_point_phase
+dcl_input v[4][0].xyzw
+dcl_input v[4][1].xy
+dcl_input v[4][2].xyz
+dcl_input vOutputControlPointID
+dcl_input vPrim
+dcl_output o0.xyzw
+dcl_output o1.xy
+dcl_output o2.xyz
+dcl_temps 1
+udiv NULL, r0.x, vOutputControlPointID, 4
+mov o0.xyzw, v[r0.x][0].xyzw
+mov o1.xy,   v[r0.x][1].xyxx
+mov o2.xyz,  v[r0.x][2].xyzx
+hs_fork_phase
+dcl_input vcp[4][0].xyzw
+dcl_input vcp[4][1].xy
+dcl_input vcp[4][2].xyz
+dcl_input vocp[32][0].xyzw
+dcl_input vocp[32][1].xy
+dcl_input vocp[32][2].xyz
+dcl_hs_fork_phase_instance_count 4
+dcl_input vForkInstanceID
+dcl_input vPrim
+dcl_indexRange o[0], o[3]
+dcl_temps 1
+dcl_indexableTemp x0[4], 1
+dcl_output_sv o0.x, finalQuadUeq0EdgeTessFactor
+dcl_output_sv o1.x, finalQuadVeq0EdgeTessFactor
+dcl_output_sv o2.x, finalQuadUeq1EdgeTessFactor
+dcl_output_sv o3.x, finalQuadVeq1EdgeTessFactor
+mov x0[0].x, 2.0f
+mov x0[1].x, 4.0f
+mov x0[2].x, 15.0f
+mov x0[3].x, 6.0f
+mov r0.x, vForkInstanceID
+mov o[r0.x].x, x0[r0.x].x
+hs_fork_phase
+dcl_input vcp[4][0].xyzw
+dcl_input vcp[4][1].xy
+dcl_input vcp[4][2].xyz
+dcl_input vocp[32][0].xyzw
+dcl_input vocp[32][1].xy
+dcl_input vocp[32][2].xyz
+dcl_hs_fork_phase_instance_count 4
+dcl_input vForkInstanceID
+dcl_input vPrim
+dcl_indexRange o[0], o[3]
+dcl_temps 1
+dcl_indexableTemp x0[4], 1
+dcl_output o0.y
+dcl_output o1.y
+dcl_output o2.y
+dcl_output o3.y
+mov x0[0].x, 12.0f
+mov x0[1].x, 32.0f
+mov x0[2].x, 15.0f
+mov x0[3].x, 5.0f
+mov r0.x, vForkInstanceID
+mov o[r0.x].y, x0[r0.x].x
+hs_join_phase
+dcl_input vcp[4][0].xyzw
+dcl_input vcp[4][1].xy
+dcl_input vcp[4][2].xyz
+dcl_input vocp[32][0].xyzw
+dcl_input vocp[32][1].xy
+dcl_input vocp[32][2].xyz
+dcl_input vpc[0].xy
+dcl_input vpc[1].xy
+dcl_input vpc[2].xy
+dcl_input vpc[3].xy
+dcl_indexRange vpc[0], vpc[3]
+dcl_output_sv o4.x, finalQuadUInsideTessFactor
+dcl_output_sv o5.x, finalQuadVInsideTessFactor
+dcl_output o4.y
+dcl_output o5.y
+dcl_input vPrim
+mov o4.x, 12.0f
+mov o5.x, 6.0f
+mov o4.y, 0.0f
+mov o5.y, 0.0f

BIN
projects/dxilconv/test/dxbc2dxil-asm/hs3.dxbc


+ 159 - 0
projects/dxilconv/test/dxbc2dxil-asm/hs3.ref

@@ -0,0 +1,159 @@
+
+%dx.types.twoi32 = type { i32, i32 }
+
+define void @main() {
+entry:
+  %0 = call i32 @dx.op.outputControlPointID.i32(i32 107)
+  %1 = call %dx.types.twoi32 @dx.op.binaryWithTwoOuts.i32(i32 43, i32 %0, i32 4)
+  %2 = extractvalue %dx.types.twoi32 %1, 1
+  %3 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 %2)
+  %4 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 1, i32 %2)
+  %5 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 2, i32 %2)
+  %6 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 3, i32 %2)
+  call void @dx.op.storeOutput.i32(i32 5, i32 0, i32 0, i8 0, i32 %3)
+  call void @dx.op.storeOutput.i32(i32 5, i32 0, i32 0, i8 1, i32 %4)
+  call void @dx.op.storeOutput.i32(i32 5, i32 0, i32 0, i8 2, i32 %5)
+  call void @dx.op.storeOutput.i32(i32 5, i32 0, i32 0, i8 3, i32 %6)
+  %7 = call i32 @dx.op.loadInput.i32(i32 4, i32 1, i32 0, i8 0, i32 %2)
+  %8 = call i32 @dx.op.loadInput.i32(i32 4, i32 1, i32 0, i8 1, i32 %2)
+  call void @dx.op.storeOutput.i32(i32 5, i32 1, i32 0, i8 0, i32 %7)
+  call void @dx.op.storeOutput.i32(i32 5, i32 1, i32 0, i8 1, i32 %8)
+  %9 = call i32 @dx.op.loadInput.i32(i32 4, i32 2, i32 0, i8 0, i32 %2)
+  %10 = call i32 @dx.op.loadInput.i32(i32 4, i32 2, i32 0, i8 1, i32 %2)
+  %11 = call i32 @dx.op.loadInput.i32(i32 4, i32 2, i32 0, i8 2, i32 %2)
+  call void @dx.op.storeOutput.i32(i32 5, i32 2, i32 0, i8 0, i32 %9)
+  call void @dx.op.storeOutput.i32(i32 5, i32 2, i32 0, i8 1, i32 %10)
+  call void @dx.op.storeOutput.i32(i32 5, i32 2, i32 0, i8 2, i32 %11)
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare %dx.types.twoi32 @dx.op.binaryWithTwoOuts.i32(i32, i32, i32) #0
+
+; Function Attrs: nounwind readnone
+declare i32 @dx.op.outputControlPointID.i32(i32) #0
+
+; Function Attrs: nounwind
+declare void @dx.op.tempRegStore.i32(i32, i32, i32) #1
+
+; Function Attrs: nounwind readonly
+declare i32 @dx.op.tempRegLoad.i32(i32, i32) #2
+
+; Function Attrs: nounwind readnone
+declare i32 @dx.op.loadInput.i32(i32, i32, i32, i8, i32) #0
+
+; Function Attrs: nounwind
+declare void @dx.op.storeOutput.i32(i32, i32, i32, i8, i32) #1
+
+define void @pc_main() {
+entry:
+  %dx.v32.x01 = alloca [16 x i32], align 4
+  br label %hullloop0
+
+hullloop0:                                        ; preds = %hullloop0, %entry
+  %InstanceID.0 = phi i32 [ 0, %entry ], [ %10, %hullloop0 ]
+  %0 = getelementptr [16 x i32], [16 x i32]* %dx.v32.x01, i32 0, i32 0
+  store i32 1073741824, i32* %0, align 4
+  %1 = getelementptr [16 x i32], [16 x i32]* %dx.v32.x01, i32 0, i32 4
+  store i32 1082130432, i32* %1, align 4
+  %2 = getelementptr [16 x i32], [16 x i32]* %dx.v32.x01, i32 0, i32 8
+  store i32 1097859072, i32* %2, align 4
+  %3 = getelementptr [16 x i32], [16 x i32]* %dx.v32.x01, i32 0, i32 12
+  store i32 1086324736, i32* %3, align 4
+  %4 = mul i32 %InstanceID.0, 4
+  %5 = add i32 %4, 0
+  %6 = getelementptr [16 x i32], [16 x i32]* %dx.v32.x01, i32 0, i32 %5
+  %7 = load i32, i32* %6, align 4
+  %8 = call float @dx.op.bitcastI32toF32(i32 126, i32 %7)
+  %9 = sub i32 %InstanceID.0, 0
+  call void @dx.op.storePatchConstant.f32(i32 106, i32 0, i32 %9, i8 0, float %8)
+  %10 = add i32 %InstanceID.0, 1
+  %11 = icmp ult i32 %10, 4
+  br i1 %11, label %hullloop0, label %hullloop0.end
+
+hullloop0.end:                                    ; preds = %hullloop0
+  br label %hullloop1
+
+hullloop1:                                        ; preds = %hullloop1, %hullloop0.end
+  %InstanceID.1 = phi i32 [ 0, %hullloop0.end ], [ %21, %hullloop1 ]
+  %12 = getelementptr [16 x i32], [16 x i32]* %dx.v32.x01, i32 0, i32 0
+  store i32 1094713344, i32* %12, align 4
+  %13 = getelementptr [16 x i32], [16 x i32]* %dx.v32.x01, i32 0, i32 4
+  store i32 1107296256, i32* %13, align 4
+  %14 = getelementptr [16 x i32], [16 x i32]* %dx.v32.x01, i32 0, i32 8
+  store i32 1097859072, i32* %14, align 4
+  %15 = getelementptr [16 x i32], [16 x i32]* %dx.v32.x01, i32 0, i32 12
+  store i32 1084227584, i32* %15, align 4
+  %16 = mul i32 %InstanceID.1, 4
+  %17 = add i32 %16, 0
+  %18 = getelementptr [16 x i32], [16 x i32]* %dx.v32.x01, i32 0, i32 %17
+  %19 = load i32, i32* %18, align 4
+  %20 = sub i32 %InstanceID.1, 0
+  call void @dx.op.storePatchConstant.i32(i32 106, i32 1, i32 %20, i8 0, i32 %19)
+  %21 = add i32 %InstanceID.1, 1
+  %22 = icmp ult i32 %21, 4
+  br i1 %22, label %hullloop1, label %hullloop1.end
+
+hullloop1.end:                                    ; preds = %hullloop1
+  br label %hullloop2
+
+hullloop2:                                        ; preds = %hullloop2, %hullloop1.end
+  %InstanceID.2 = phi i32 [ 0, %hullloop1.end ], [ %23, %hullloop2 ]
+  call void @dx.op.storePatchConstant.f32(i32 106, i32 5, i32 0, i8 0, float 1.200000e+01)
+  call void @dx.op.storePatchConstant.f32(i32 106, i32 7, i32 0, i8 0, float 6.000000e+00)
+  call void @dx.op.storePatchConstant.i32(i32 106, i32 6, i32 0, i8 0, i32 0)
+  call void @dx.op.storePatchConstant.i32(i32 106, i32 8, i32 0, i8 0, i32 0)
+  %23 = add i32 %InstanceID.2, 1
+  %24 = icmp ult i32 %23, 1
+  br i1 %24, label %hullloop2, label %hullloop2.end
+
+hullloop2.end:                                    ; preds = %hullloop2
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare float @dx.op.bitcastI32toF32(i32, i32) #0
+
+; Function Attrs: nounwind
+declare void @dx.op.storePatchConstant.f32(i32, i32, i32, i8, float) #1
+
+; Function Attrs: nounwind readnone
+declare i32 @dx.op.bitcastF32toI32(i32, float) #0
+
+; Function Attrs: nounwind
+declare void @dx.op.storePatchConstant.i32(i32, i32, i32, i8, i32) #1
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+attributes #2 = { nounwind readonly }
+
+!dx.version = !{!0}
+!dx.valver = !{!0}
+!dx.shaderModel = !{!1}
+!dx.entryPoints = !{!2}
+!llvm.ident = !{!23}
+
+!0 = !{i32 1, i32 0}
+!1 = !{!"hs", i32 6, i32 0}
+!2 = !{void ()* @main, !"main", !3, null, !21}
+!3 = !{!4, !4, !9}
+!4 = !{!5, !7, !8}
+!5 = !{i32 0, !"0_", i8 5, i8 0, !6, i8 0, i32 1, i8 4, i32 0, i8 0, null}
+!6 = !{i32 0}
+!7 = !{i32 1, !"1_", i8 5, i8 0, !6, i8 0, i32 1, i8 2, i32 1, i8 0, null}
+!8 = !{i32 2, !"2_", i8 5, i8 0, !6, i8 0, i32 1, i8 3, i32 2, i8 0, null}
+!9 = !{!10, !12, !13, !14, !15, !16, !17, !18, !20}
+!10 = !{i32 0, !"SV_TessFactor", i8 9, i8 25, !11, i8 0, i32 4, i8 1, i32 0, i8 0, null}
+!11 = !{i32 0, i32 1, i32 2, i32 3}
+!12 = !{i32 1, !"0_", i8 5, i8 0, !6, i8 0, i32 1, i8 1, i32 0, i8 1, null}
+!13 = !{i32 2, !"1_", i8 5, i8 0, !6, i8 0, i32 1, i8 1, i32 1, i8 1, null}
+!14 = !{i32 3, !"2_", i8 5, i8 0, !6, i8 0, i32 1, i8 1, i32 2, i8 1, null}
+!15 = !{i32 4, !"3_", i8 5, i8 0, !6, i8 0, i32 1, i8 1, i32 3, i8 1, null}
+!16 = !{i32 5, !"SV_InsideTessFactor", i8 9, i8 26, !6, i8 0, i32 1, i8 1, i32 4, i8 0, null}
+!17 = !{i32 6, !"4_", i8 5, i8 0, !6, i8 0, i32 1, i8 1, i32 4, i8 1, null}
+!18 = !{i32 7, !"SV_InsideTessFactor", i8 9, i8 26, !19, i8 0, i32 1, i8 1, i32 5, i8 0, null}
+!19 = !{i32 1}
+!20 = !{i32 8, !"5_", i8 5, i8 0, !6, i8 0, i32 1, i8 1, i32 5, i8 1, null}
+!21 = !{i32 0, i64 258, i32 3, !22}
+!22 = !{void ()* @pc_main, i32 4, i32 32, i32 3, i32 3, i32 3, float 6.400000e+01}
+!23 = !{!"dxbc2dxil 1.2"}

+ 19 - 0
projects/dxilconv/test/dxbc2dxil-asm/indexabletemp4.asm

@@ -0,0 +1,19 @@
+/*// RUN: %testasm %s /Fo %t.dxbc*/
+// RUN: %dxbc2dxil %t.dxbc /emit-llvm /o %t.ll.converted
+// RUN: fc %b.ref %t.ll.converted
+
+ps_5_0
+dcl_globalFlags refactoringAllowed
+dcl_constantbuffer cb0[12], dynamicIndexed
+dcl_input_ps constant v1.x
+dcl_input_ps constant v1.y
+dcl_output o0.x
+dcl_temps 1
+dcl_indexableTemp x0[4], 2
+mov r0.x, v1.x
+mov x0[0].x, cb0[r0.x + 0].x
+mov x0[1].x, cb0[r0.x + 4].x
+mov r0.x, v1.y
+mov x0[1].y, r0.x
+mov o0.x, x0[ x0[1].y + 77 ].x
+ret

BIN
projects/dxilconv/test/dxbc2dxil-asm/indexabletemp4.dxbc


+ 82 - 0
projects/dxilconv/test/dxbc2dxil-asm/indexabletemp4.ref

@@ -0,0 +1,82 @@
+
+%dx.types.Handle = type { i8* }
+%dx.types.CBufRet.i32 = type { i32, i32, i32, i32 }
+%dx.types.i8x192 = type { [192 x i8] }
+
+define void @main() {
+entry:
+  %dx.v32.x01 = alloca [8 x i32], align 4
+  %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 0, i1 false)
+  %1 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+  %2 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %0, i32 %1)
+  %3 = extractvalue %dx.types.CBufRet.i32 %2, 0
+  %4 = getelementptr [8 x i32], [8 x i32]* %dx.v32.x01, i32 0, i32 0
+  store i32 %3, i32* %4, align 4
+  %5 = add i32 %1, 4
+  %6 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %0, i32 %5)
+  %7 = extractvalue %dx.types.CBufRet.i32 %6, 0
+  %8 = getelementptr [8 x i32], [8 x i32]* %dx.v32.x01, i32 0, i32 2
+  store i32 %7, i32* %8, align 4
+  %9 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 1, i32 undef)
+  %10 = getelementptr [8 x i32], [8 x i32]* %dx.v32.x01, i32 0, i32 3
+  store i32 %9, i32* %10, align 4
+  %11 = getelementptr [8 x i32], [8 x i32]* %dx.v32.x01, i32 0, i32 3
+  %12 = load i32, i32* %11, align 4
+  %13 = add i32 %12, 77
+  %14 = mul i32 %13, 2
+  %15 = add i32 %14, 0
+  %16 = getelementptr [8 x i32], [8 x i32]* %dx.v32.x01, i32 0, i32 %15
+  %17 = load i32, i32* %16, align 4
+  call void @dx.op.storeOutput.i32(i32 5, i32 0, i32 0, i8 0, i32 %17)
+  ret void
+}
+
+; Function Attrs: nounwind readonly
+declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #0
+
+; Function Attrs: nounwind readnone
+declare i32 @dx.op.loadInput.i32(i32, i32, i32, i8, i32) #1
+
+; Function Attrs: nounwind
+declare void @dx.op.tempRegStore.i32(i32, i32, i32) #2
+
+; Function Attrs: nounwind readonly
+declare i32 @dx.op.tempRegLoad.i32(i32, i32) #0
+
+; Function Attrs: nounwind readonly
+declare %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32, %dx.types.Handle, i32) #0
+
+; Function Attrs: nounwind readnone
+declare float @dx.op.bitcastI32toF32(i32, i32) #1
+
+; Function Attrs: nounwind readnone
+declare i32 @dx.op.bitcastF32toI32(i32, float) #1
+
+; Function Attrs: nounwind
+declare void @dx.op.storeOutput.i32(i32, i32, i32, i8, i32) #2
+
+attributes #0 = { nounwind readonly }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind }
+
+!dx.version = !{!0}
+!dx.valver = !{!0}
+!dx.shaderModel = !{!1}
+!dx.resources = !{!2}
+!dx.entryPoints = !{!5}
+!llvm.ident = !{!13}
+
+!0 = !{i32 1, i32 0}
+!1 = !{!"ps", i32 6, i32 0}
+!2 = !{null, null, !3, null}
+!3 = !{!4}
+!4 = !{i32 0, %dx.types.i8x192 addrspace(2)* undef, !"CB0", i32 0, i32 0, i32 1, i32 192, null}
+!5 = !{void ()* @main, !"main", !6, !2, !12}
+!6 = !{!7, !10, null}
+!7 = !{!8}
+!8 = !{i32 0, !"1_", i8 5, i8 0, !9, i8 1, i32 1, i8 2, i32 1, i8 0, null}
+!9 = !{i32 0}
+!10 = !{!11}
+!11 = !{i32 0, !"SV_Target", i8 5, i8 16, !9, i8 0, i32 1, i8 1, i32 0, i8 0, null}
+!12 = !{i32 0, i64 256}
+!13 = !{!"dxbc2dxil 1.2"}

+ 63 - 0
projects/dxilconv/test/dxbc2dxil-asm/indexabletemp6.asm

@@ -0,0 +1,63 @@
+/*// RUN: %testasm %s /allowMinimumPrecision /Fo %t.dxbc*/
+// RUN: %dxbc2dxil %t.dxbc /emit-llvm /o %t.ll.converted
+// RUN: fc %b.ref %t.ll.converted
+
+//
+// Generated by Microsoft (R) HLSL Shader Compiler 10.1 INTERNAL
+//
+//
+// Note: shader requires additional functionality:
+//       Minimum-precision data types
+//
+//
+// Buffer Definitions:
+//
+// cbuffer $Globals
+// {
+//
+//   min16float g1[4];                  // Offset:    0 Size:    52
+//   min16float g2[8];                  // Offset:   64 Size:   116
+//
+// }
+//
+//
+// Resource Bindings:
+//
+// Name                                 Type  Format         Dim      HLSL Bind  Count
+// ------------------------------ ---------- ------- ----------- -------------- ------
+// $Globals                          cbuffer      NA          NA            cb0      1
+//
+//
+//
+// Input signature:
+//
+// Name                 Index   Mask Register SysValue  Format   Used
+// -------------------- ----- ------ -------- -------- ------- ------
+// A                        0   xyzw        0     NONE  min16f
+// B                        0   x           1     NONE     int   x
+// C                        0    y          1     NONE     int    y
+//
+//
+// Output signature:
+//
+// Name                 Index   Mask Register SysValue  Format   Used
+// -------------------- ----- ------ -------- -------- ------- ------
+// SV_TARGET                0   x           0   TARGET  min16f   x
+//
+ps_5_0
+dcl_globalFlags refactoringAllowed | enableMinimumPrecision
+dcl_constantbuffer cb0[12], dynamicIndexed
+dcl_input_ps constant v1.x
+dcl_input_ps constant v1.y
+dcl_output o0.x {min16f}
+dcl_temps 1
+dcl_indexableTemp x0[4], 4
+mov r0.x, v1.x
+mov x0[0].x, cb0[r0.x + 4].x
+mov r0.y, x0[0].x
+mov x0[0].x {min16f}, cb0[r0.x + 0].x {min16f}
+mov x0[1].x {min16f}, cb0[r0.x + 4].x {min16f}
+mov r0.x, v1.y
+add x0[r0.x + 0].x {min16f}, x0[r0.x + 0].x {min16f}, r0.y {min16f}
+mov o0.x {min16f}, x0[r0.x + 0].x {min16f}
+ret

BIN
projects/dxilconv/test/dxbc2dxil-asm/indexabletemp6.dxbc


+ 103 - 0
projects/dxilconv/test/dxbc2dxil-asm/indexabletemp6.ref

@@ -0,0 +1,103 @@
+
+%dx.types.Handle = type { i8* }
+%dx.types.CBufRet.i32 = type { i32, i32, i32, i32 }
+%dx.types.CBufRet.f32 = type { float, float, float, float }
+%dx.types.i8x192 = type { [192 x i8] }
+
+define void @main() {
+entry:
+  %dx.v32.x01 = alloca [16 x i32], align 4
+  %dx.v16.x0 = alloca [16 x half], align 4
+  %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 0, i1 false)
+  %1 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+  %2 = add i32 %1, 4
+  %3 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %0, i32 %2)
+  %4 = extractvalue %dx.types.CBufRet.i32 %3, 0
+  %5 = getelementptr [16 x i32], [16 x i32]* %dx.v32.x01, i32 0, i32 0
+  store i32 %4, i32* %5, align 4
+  %6 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %0, i32 %1)
+  %7 = extractvalue %dx.types.CBufRet.f32 %6, 0
+  %8 = getelementptr [16 x half], [16 x half]* %dx.v16.x0, i32 0, i32 0
+  %9 = fptrunc float %7 to half
+  store half %9, half* %8, align 2
+  %10 = add i32 %1, 4
+  %11 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %0, i32 %10)
+  %12 = extractvalue %dx.types.CBufRet.f32 %11, 0
+  %13 = getelementptr [16 x half], [16 x half]* %dx.v16.x0, i32 0, i32 4
+  %14 = fptrunc float %12 to half
+  store half %14, half* %13, align 2
+  %15 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 1, i32 undef)
+  %16 = mul i32 %15, 4
+  %17 = add i32 %16, 0
+  %18 = getelementptr [16 x half], [16 x half]* %dx.v16.x0, i32 0, i32 %17
+  %19 = load half, half* %18, align 2
+  %20 = fadd fast half %19, undef
+  %21 = mul i32 %15, 4
+  %22 = add i32 %21, 0
+  %23 = getelementptr [16 x half], [16 x half]* %dx.v16.x0, i32 0, i32 %22
+  store half %20, half* %23, align 2
+  %24 = mul i32 %15, 4
+  %25 = add i32 %24, 0
+  %26 = getelementptr [16 x half], [16 x half]* %dx.v16.x0, i32 0, i32 %25
+  %27 = load half, half* %26, align 2
+  %28 = fpext half %27 to float
+  %29 = call i32 @dx.op.bitcastF32toI32(i32 127, float %28)
+  call void @dx.op.storeOutput.i32(i32 5, i32 0, i32 0, i8 0, i32 %29)
+  ret void
+}
+
+; Function Attrs: nounwind readonly
+declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #0
+
+; Function Attrs: nounwind readnone
+declare i32 @dx.op.loadInput.i32(i32, i32, i32, i8, i32) #1
+
+; Function Attrs: nounwind
+declare void @dx.op.tempRegStore.i32(i32, i32, i32) #2
+
+; Function Attrs: nounwind readonly
+declare i32 @dx.op.tempRegLoad.i32(i32, i32) #0
+
+; Function Attrs: nounwind readonly
+declare %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32, %dx.types.Handle, i32) #0
+
+; Function Attrs: nounwind readnone
+declare float @dx.op.bitcastI32toF32(i32, i32) #1
+
+; Function Attrs: nounwind readnone
+declare i32 @dx.op.bitcastF32toI32(i32, float) #1
+
+; Function Attrs: nounwind readonly
+declare %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32, %dx.types.Handle, i32) #0
+
+; Function Attrs: nounwind readonly
+declare half @dx.op.tempRegLoad.f16(i32, i32) #0
+
+; Function Attrs: nounwind
+declare void @dx.op.storeOutput.i32(i32, i32, i32, i8, i32) #2
+
+attributes #0 = { nounwind readonly }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind }
+
+!dx.version = !{!0}
+!dx.valver = !{!0}
+!dx.shaderModel = !{!1}
+!dx.resources = !{!2}
+!dx.entryPoints = !{!5}
+!llvm.ident = !{!13}
+
+!0 = !{i32 1, i32 0}
+!1 = !{!"ps", i32 6, i32 0}
+!2 = !{null, null, !3, null}
+!3 = !{!4}
+!4 = !{i32 0, %dx.types.i8x192 addrspace(2)* undef, !"CB0", i32 0, i32 0, i32 1, i32 192, null}
+!5 = !{void ()* @main, !"main", !6, !2, !12}
+!6 = !{!7, !10, null}
+!7 = !{!8}
+!8 = !{i32 0, !"1_", i8 5, i8 0, !9, i8 1, i32 1, i8 2, i32 1, i8 0, null}
+!9 = !{i32 0}
+!10 = !{!11}
+!11 = !{i32 0, !"SV_Target", i8 5, i8 16, !9, i8 0, i32 1, i8 1, i32 0, i8 0, null}
+!12 = !{i32 0, i64 288}
+!13 = !{!"dxbc2dxil 1.2"}

+ 11 - 0
projects/dxilconv/test/dxbc2dxil/abs1.hlsl

@@ -0,0 +1,11 @@
+// RUN: %fxc /T ps_5_0 %s /Fo %t.dxbc
+// RUN: %dxbc2dxil %t.dxbc /emit-llvm /o %t.ll.converted
+// RUN: fc %b.ref %t.ll.converted
+
+
+
+
+float4 main(float4 a : A) : SV_TARGET
+{
+  return abs(a.yxxx);
+}

+ 43 - 0
projects/dxilconv/test/dxbc2dxil/abs1.ref

@@ -0,0 +1,43 @@
+
+define void @main() {
+entry:
+  %0 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 undef)
+  %1 = call float @dx.op.unary.f32(i32 6, float %0)
+  %2 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+  %3 = call float @dx.op.unary.f32(i32 6, float %2)
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %1)
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %3)
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %3)
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %3)
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #0
+
+; Function Attrs: nounwind readnone
+declare float @dx.op.unary.f32(i32, float) #0
+
+; Function Attrs: nounwind
+declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #1
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+
+!dx.version = !{!0}
+!dx.valver = !{!0}
+!dx.shaderModel = !{!1}
+!dx.entryPoints = !{!2}
+!llvm.ident = !{!10}
+
+!0 = !{i32 1, i32 0}
+!1 = !{!"ps", i32 6, i32 0}
+!2 = !{void ()* @main, !"main", !3, null, !9}
+!3 = !{!4, !7, null}
+!4 = !{!5}
+!5 = !{i32 0, !"A", i8 9, i8 0, !6, i8 2, i32 1, i8 4, i32 0, i8 0, null}
+!6 = !{i32 0}
+!7 = !{!8}
+!8 = !{i32 0, !"SV_Target", i8 9, i8 16, !6, i8 0, i32 1, i8 4, i32 0, i8 0, null}
+!9 = !{i32 0, i64 256}
+!10 = !{!"dxbc2dxil 1.2"}

+ 11 - 0
projects/dxilconv/test/dxbc2dxil/abs2.hlsl

@@ -0,0 +1,11 @@
+// RUN: %fxc /T ps_5_0 %s /Fo %t.dxbc
+// RUN: %dxbc2dxil %t.dxbc /emit-llvm /o %t.ll.converted
+// RUN: fc %b.ref %t.ll.converted
+
+
+
+
+int4 main(int4 a : A) : SV_TARGET
+{
+  return abs(a.yxxx);
+}

+ 49 - 0
projects/dxilconv/test/dxbc2dxil/abs2.ref

@@ -0,0 +1,49 @@
+
+define void @main() {
+entry:
+  %0 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 1, i32 undef)
+  %1 = sub i32 0, %0
+  %2 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+  %3 = sub i32 0, %2
+  %4 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 1, i32 undef)
+  %5 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+  %6 = call i32 @dx.op.binary.i32(i32 37, i32 %1, i32 %4)
+  %7 = call i32 @dx.op.binary.i32(i32 37, i32 %3, i32 %5)
+  %8 = call i32 @dx.op.binary.i32(i32 37, i32 %3, i32 %5)
+  %9 = call i32 @dx.op.binary.i32(i32 37, i32 %3, i32 %5)
+  call void @dx.op.storeOutput.i32(i32 5, i32 0, i32 0, i8 0, i32 %6)
+  call void @dx.op.storeOutput.i32(i32 5, i32 0, i32 0, i8 1, i32 %7)
+  call void @dx.op.storeOutput.i32(i32 5, i32 0, i32 0, i8 2, i32 %8)
+  call void @dx.op.storeOutput.i32(i32 5, i32 0, i32 0, i8 3, i32 %9)
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @dx.op.binary.i32(i32, i32, i32) #0
+
+; Function Attrs: nounwind readnone
+declare i32 @dx.op.loadInput.i32(i32, i32, i32, i8, i32) #0
+
+; Function Attrs: nounwind
+declare void @dx.op.storeOutput.i32(i32, i32, i32, i8, i32) #1
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+
+!dx.version = !{!0}
+!dx.valver = !{!0}
+!dx.shaderModel = !{!1}
+!dx.entryPoints = !{!2}
+!llvm.ident = !{!10}
+
+!0 = !{i32 1, i32 0}
+!1 = !{!"ps", i32 6, i32 0}
+!2 = !{void ()* @main, !"main", !3, null, !9}
+!3 = !{!4, !7, null}
+!4 = !{!5}
+!5 = !{i32 0, !"A", i8 4, i8 0, !6, i8 1, i32 1, i8 4, i32 0, i8 0, null}
+!6 = !{i32 0}
+!7 = !{!8}
+!8 = !{i32 0, !"SV_Target", i8 4, i8 16, !6, i8 0, i32 1, i8 4, i32 0, i8 0, null}
+!9 = !{i32 0, i64 256}
+!10 = !{!"dxbc2dxil 1.2"}

+ 61 - 0
projects/dxilconv/test/dxbc2dxil/atomics.hlsl

@@ -0,0 +1,61 @@
+// RUN: %fxc /T ps_5_1 %s /Fo %t.dxbc
+// RUN: %dxbc2dxil %t.dxbc /emit-llvm /o %t.ll.converted
+// RUN: fc %b.ref %t.ll.converted
+
+
+
+
+RWBuffer<uint> buf0;
+RWByteAddressBuffer buf1;
+RWByteAddressBuffer buf2[4][7];
+RWTexture2D<uint> tex0;
+RWTexture3D<uint> tex1;
+RWTexture2DArray<uint> tex2;
+RWTexture3D<uint> tex3[8][4];
+
+#define RS "DescriptorTable(" \
+             "UAV(u0), "\
+             "UAV(u1), "\
+             "UAV(u2, numDescriptors=28), "\
+             "UAV(u30), "\
+             "UAV(u31), "\
+             "UAV(u32), "\
+             "UAV(u33, numDescriptors=32) "\
+             ")"\
+
+[RootSignature( RS )]
+
+float4 main(uint4 a : A, float4 b : B) : SV_Target
+{
+    uint4 r = a;
+    uint comparevalue = r.w;
+    uint newvalue = r.z;
+    uint origvalue;
+
+    InterlockedAdd(buf0[r.z], newvalue);
+    InterlockedMin(buf0[r.z], newvalue);
+    InterlockedMax(buf0[r.z], newvalue);
+    InterlockedAnd(buf0[r.z], newvalue);
+    InterlockedOr (buf0[r.z], newvalue);
+    InterlockedXor(buf0[r.z], newvalue);
+
+    InterlockedAdd(buf0[r.z], newvalue, origvalue); newvalue += origvalue;
+    InterlockedAdd(tex0[r.xy], newvalue);
+    InterlockedAdd(tex1[r.ywz], newvalue);
+    InterlockedAdd(tex2[r.xyz], newvalue);
+    InterlockedAdd(tex3[r.x][1][r.xyz], newvalue);
+
+    InterlockedCompareExchange(buf0[r.z], comparevalue, newvalue, origvalue); newvalue += origvalue;
+
+    buf1.InterlockedAdd(r.x, r.z, newvalue); // coord, newvalue, original
+    buf2[2][r.y].InterlockedAdd(r.x, r.z, newvalue);
+    buf1.InterlockedMin(r.x, r.z, newvalue);
+    buf1.InterlockedMax(r.x, r.z, newvalue);
+    buf1.InterlockedAnd(r.x, r.z, newvalue);
+    buf1.InterlockedOr (r.x, r.z, newvalue);
+    buf1.InterlockedXor(r.x, r.z, newvalue);
+    buf1.InterlockedExchange(r.z, newvalue, origvalue); newvalue += origvalue;
+    buf1.InterlockedCompareExchange(r.z, comparevalue, newvalue, origvalue); newvalue += origvalue;
+
+    return newvalue;
+}

+ 169 - 0
projects/dxilconv/test/dxbc2dxil/atomics.ref

@@ -0,0 +1,169 @@
+
+%dx.types.Handle = type { i8* }
+%dx.types.u32 = type { i32 }
+
+define void @main() {
+entry:
+  %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 0, i1 false)
+  %1 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 2, i32 undef)
+  %2 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 2, i32 undef)
+  %3 = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %0, i32 0, i32 %1, i32 undef, i32 undef, i32 %2)
+  %4 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 0, i1 false)
+  %5 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 2, i32 undef)
+  %6 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 2, i32 undef)
+  %7 = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %4, i32 6, i32 %5, i32 undef, i32 undef, i32 %6)
+  %8 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 0, i1 false)
+  %9 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 2, i32 undef)
+  %10 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 2, i32 undef)
+  %11 = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %8, i32 7, i32 %9, i32 undef, i32 undef, i32 %10)
+  %12 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 0, i1 false)
+  %13 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 2, i32 undef)
+  %14 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 2, i32 undef)
+  %15 = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %12, i32 1, i32 %13, i32 undef, i32 undef, i32 %14)
+  %16 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 0, i1 false)
+  %17 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 2, i32 undef)
+  %18 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 2, i32 undef)
+  %19 = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %16, i32 2, i32 %17, i32 undef, i32 undef, i32 %18)
+  %20 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 0, i1 false)
+  %21 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 2, i32 undef)
+  %22 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 2, i32 undef)
+  %23 = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %20, i32 3, i32 %21, i32 undef, i32 undef, i32 %22)
+  %24 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 0, i1 false)
+  %25 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 2, i32 undef)
+  %26 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 2, i32 undef)
+  %27 = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %24, i32 0, i32 %25, i32 undef, i32 undef, i32 %26)
+  %28 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 2, i32 undef)
+  %29 = add i32 %27, %28
+  %30 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 3, i32 30, i1 false)
+  %31 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+  %32 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 1, i32 undef)
+  %33 = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %30, i32 0, i32 %31, i32 %32, i32 undef, i32 %29)
+  %34 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 4, i32 31, i1 false)
+  %35 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 1, i32 undef)
+  %36 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 3, i32 undef)
+  %37 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 2, i32 undef)
+  %38 = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %34, i32 0, i32 %35, i32 %36, i32 %37, i32 %29)
+  %39 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 5, i32 32, i1 false)
+  %40 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+  %41 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 1, i32 undef)
+  %42 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 2, i32 undef)
+  %43 = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %39, i32 0, i32 %40, i32 %41, i32 %42, i32 %29)
+  %44 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+  %45 = call i32 @dx.op.quaternary.i32(i32 53, i32 30, i32 2, i32 %44, i32 1)
+  %46 = add i32 %45, 33
+  %47 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 6, i32 %46, i1 false)
+  %48 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+  %49 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 1, i32 undef)
+  %50 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 2, i32 undef)
+  %51 = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %47, i32 0, i32 %48, i32 %49, i32 %50, i32 %29)
+  %52 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 0, i1 false)
+  %53 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 2, i32 undef)
+  %54 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 3, i32 undef)
+  %55 = call i32 @dx.op.atomicCompareExchange.i32(i32 79, %dx.types.Handle %52, i32 %53, i32 undef, i32 undef, i32 %54, i32 %29)
+  %56 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 1, i32 1, i1 false)
+  %57 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+  %58 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 2, i32 undef)
+  %59 = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %56, i32 0, i32 %57, i32 undef, i32 undef, i32 %58)
+  %60 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 1, i32 undef)
+  %61 = add i32 %60, 14
+  %62 = add i32 %61, 2
+  %63 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 2, i32 %62, i1 false)
+  %64 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+  %65 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 2, i32 undef)
+  %66 = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %63, i32 0, i32 %64, i32 undef, i32 undef, i32 %65)
+  %67 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 1, i32 1, i1 false)
+  %68 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+  %69 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 2, i32 undef)
+  %70 = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %67, i32 6, i32 %68, i32 undef, i32 undef, i32 %69)
+  %71 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 1, i32 1, i1 false)
+  %72 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+  %73 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 2, i32 undef)
+  %74 = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %71, i32 7, i32 %72, i32 undef, i32 undef, i32 %73)
+  %75 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 1, i32 1, i1 false)
+  %76 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+  %77 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 2, i32 undef)
+  %78 = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %75, i32 1, i32 %76, i32 undef, i32 undef, i32 %77)
+  %79 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 1, i32 1, i1 false)
+  %80 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+  %81 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 2, i32 undef)
+  %82 = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %79, i32 2, i32 %80, i32 undef, i32 undef, i32 %81)
+  %83 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 1, i32 1, i1 false)
+  %84 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+  %85 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 2, i32 undef)
+  %86 = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %83, i32 3, i32 %84, i32 undef, i32 undef, i32 %85)
+  %87 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 1, i32 1, i1 false)
+  %88 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 2, i32 undef)
+  %89 = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %87, i32 8, i32 %88, i32 undef, i32 undef, i32 %86)
+  %90 = add i32 %86, %89
+  %91 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 1, i32 1, i1 false)
+  %92 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 2, i32 undef)
+  %93 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 3, i32 undef)
+  %94 = call i32 @dx.op.atomicCompareExchange.i32(i32 79, %dx.types.Handle %91, i32 %92, i32 undef, i32 undef, i32 %93, i32 %90)
+  %95 = add i32 %90, %94
+  %96 = uitofp i32 %95 to float
+  %97 = uitofp i32 %95 to float
+  %98 = uitofp i32 %95 to float
+  %99 = uitofp i32 %95 to float
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %96)
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %97)
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %98)
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %99)
+  ret void
+}
+
+; Function Attrs: nounwind readonly
+declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #0
+
+; Function Attrs: nounwind readnone
+declare i32 @dx.op.loadInput.i32(i32, i32, i32, i8, i32) #1
+
+; Function Attrs: nounwind
+declare i32 @dx.op.atomicBinOp.i32(i32, %dx.types.Handle, i32, i32, i32, i32, i32) #2
+
+; Function Attrs: nounwind
+declare void @dx.op.tempRegStore.i32(i32, i32, i32) #2
+
+; Function Attrs: nounwind readonly
+declare i32 @dx.op.tempRegLoad.i32(i32, i32) #0
+
+; Function Attrs: nounwind readnone
+declare i32 @dx.op.quaternary.i32(i32, i32, i32, i32, i32) #1
+
+; Function Attrs: nounwind
+declare i32 @dx.op.atomicCompareExchange.i32(i32, %dx.types.Handle, i32, i32, i32, i32, i32) #2
+
+; Function Attrs: nounwind
+declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #2
+
+attributes #0 = { nounwind readonly }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind }
+
+!dx.version = !{!0}
+!dx.valver = !{!0}
+!dx.shaderModel = !{!1}
+!dx.resources = !{!2}
+!dx.entryPoints = !{!12}
+!llvm.ident = !{!20}
+
+!0 = !{i32 1, i32 0}
+!1 = !{!"ps", i32 6, i32 0}
+!2 = !{null, !3, null, null}
+!3 = !{!4, !6, !7, !8, !9, !10, !11}
+!4 = !{i32 0, %dx.types.u32 addrspace(1)* undef, !"U0", i32 0, i32 0, i32 1, i32 10, i1 false, i1 false, i1 false, !5}
+!5 = !{i32 0, i32 5}
+!6 = !{i32 1, %dx.types.u32 addrspace(1)* undef, !"U1", i32 0, i32 1, i32 1, i32 11, i1 false, i1 false, i1 false, null}
+!7 = !{i32 2, %dx.types.u32 addrspace(1)* undef, !"U2", i32 0, i32 2, i32 28, i32 11, i1 false, i1 false, i1 false, null}
+!8 = !{i32 3, %dx.types.u32 addrspace(1)* undef, !"U3", i32 0, i32 30, i32 1, i32 2, i1 false, i1 false, i1 false, !5}
+!9 = !{i32 4, %dx.types.u32 addrspace(1)* undef, !"U4", i32 0, i32 31, i32 1, i32 4, i1 false, i1 false, i1 false, !5}
+!10 = !{i32 5, %dx.types.u32 addrspace(1)* undef, !"U5", i32 0, i32 32, i32 1, i32 7, i1 false, i1 false, i1 false, !5}
+!11 = !{i32 6, %dx.types.u32 addrspace(1)* undef, !"U6", i32 0, i32 33, i32 32, i32 4, i1 false, i1 false, i1 false, !5}
+!12 = !{void ()* @main, !"main", !13, !2, null}
+!13 = !{!14, !18, null}
+!14 = !{!15, !17}
+!15 = !{i32 0, !"A", i8 5, i8 0, !16, i8 1, i32 1, i8 4, i32 0, i8 0, null}
+!16 = !{i32 0}
+!17 = !{i32 1, !"B", i8 9, i8 0, !16, i8 0, i32 1, i8 4, i32 1, i8 0, null}
+!18 = !{!19}
+!19 = !{i32 0, !"SV_Target", i8 9, i8 16, !16, i8 0, i32 1, i8 4, i32 0, i8 0, null}
+!20 = !{!"dxbc2dxil 1.2"}

+ 32 - 0
projects/dxilconv/test/dxbc2dxil/bad_ftoi.hlsl

@@ -0,0 +1,32 @@
+// RUN: %fxc /T vs_5_0 /Od %s /Fo %t.dxbc
+// RUN: %dxbc2dxil %t.dxbc /emit-llvm /o %t.ll.converted
+// RUN: fc %b.ref %t.ll.converted
+
+int4 main() : OUTPUT
+{
+  return int4(340282346638528860000000000000000000000.0,
+              -340282346638528860000000000000000000000.0,
+              asint((uint)340282346638528860000000000000000000000.0),
+              asint((uint)-340282346638528860000000000000000000000.0));
+}
+
+// fxc produces:
+// -> ftou o0.z, l(340282346638528860000000000000000000000.000000)
+// -> ftou o0.w, l(-340282346638528860000000000000000000000.000000)
+// -> ftoi o0.xy, l(340282346638528860000000000000000000000.000000, -340282346638528860000000000000000000000.000000, 0.000000, 0.000000)
+
+// dxbc2dxil used to produce:
+// -> call void @dx.op.storeOutput.i32(i32 5, i32 0, i32 0, i8 2, i32 undef)
+// -> call void @dx.op.storeOutput.i32(i32 5, i32 0, i32 0, i8 3, i32 undef)
+// -> call void @dx.op.storeOutput.i32(i32 5, i32 0, i32 0, i8 0, i32 undef)
+// -> call void @dx.op.storeOutput.i32(i32 5, i32 0, i32 0, i8 1, i32 undef)
+// "i32 undef" is invalid here.  It's caused by:
+//   return of opInvalidOp from APFloat::convertToSignExtendedInteger
+//   which llvm::ConstantFoldCastInstruction turns into i32 undef
+
+// fixed dxbc2dxil produces:
+// -> call void @dx.op.storeOutput.i32(i32 5, i32 0, i32 0, i8 2, i32 -1)
+// -> call void @dx.op.storeOutput.i32(i32 5, i32 0, i32 0, i8 3, i32 0)
+// -> call void @dx.op.storeOutput.i32(i32 5, i32 0, i32 0, i8 0, i32 2147483647)
+// -> call void @dx.op.storeOutput.i32(i32 5, i32 0, i32 0, i8 1, i32 -2147483648)
+// Which is int4(max int, min int, max uint, min uint)

+ 30 - 0
projects/dxilconv/test/dxbc2dxil/bad_ftoi.ref

@@ -0,0 +1,30 @@
+
+define void @main() {
+entry:
+  call void @dx.op.storeOutput.i32(i32 5, i32 0, i32 0, i8 2, i32 -1)
+  call void @dx.op.storeOutput.i32(i32 5, i32 0, i32 0, i8 3, i32 0)
+  call void @dx.op.storeOutput.i32(i32 5, i32 0, i32 0, i8 0, i32 2147483647)
+  call void @dx.op.storeOutput.i32(i32 5, i32 0, i32 0, i8 1, i32 -2147483648)
+  ret void
+}
+
+; Function Attrs: nounwind
+declare void @dx.op.storeOutput.i32(i32, i32, i32, i8, i32) #0
+
+attributes #0 = { nounwind }
+
+!dx.version = !{!0}
+!dx.valver = !{!0}
+!dx.shaderModel = !{!1}
+!dx.entryPoints = !{!2}
+!llvm.ident = !{!8}
+
+!0 = !{i32 1, i32 0}
+!1 = !{!"vs", i32 6, i32 0}
+!2 = !{void ()* @main, !"main", !3, null, !7}
+!3 = !{null, !4, null}
+!4 = !{!5}
+!5 = !{i32 0, !"OUTPUT", i8 4, i8 0, !6, i8 0, i32 1, i8 4, i32 0, i8 0, null}
+!6 = !{i32 0}
+!7 = !{i32 0, i64 257}
+!8 = !{!"dxbc2dxil 1.2"}

+ 17 - 0
projects/dxilconv/test/dxbc2dxil/binary1.hlsl

@@ -0,0 +1,17 @@
+// RUN: %fxc /T ps_5_0 %s /Fo %t.dxbc
+// RUN: %dxbc2dxil %t.dxbc /emit-llvm /o %t.ll.converted
+// RUN: fc %b.ref %t.ll.converted
+
+
+
+
+float main(float a : A, float b : B, float2 c : C) : SV_Target
+{
+  float r = a;
+  r += a;
+  r /= a;
+  r *= b;
+  r = max(r, c.x);
+  r = min(r, c.y);
+  return r;
+}

+ 56 - 0
projects/dxilconv/test/dxbc2dxil/binary1.ref

@@ -0,0 +1,56 @@
+
+define void @main() {
+entry:
+  %0 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+  %1 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+  %2 = fadd fast float %0, %1
+  %3 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+  %4 = fdiv fast float %2, %3
+  %5 = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0, i32 undef)
+  %6 = fmul fast float %4, %5
+  %7 = call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 0, i32 undef)
+  %8 = call float @dx.op.binary.f32(i32 35, float %6, float %7)
+  %9 = call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 1, i32 undef)
+  %10 = call float @dx.op.binary.f32(i32 36, float %8, float %9)
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %10)
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #0
+
+; Function Attrs: nounwind
+declare void @dx.op.tempRegStore.f32(i32, i32, float) #1
+
+; Function Attrs: nounwind readonly
+declare float @dx.op.tempRegLoad.f32(i32, i32) #2
+
+; Function Attrs: nounwind readnone
+declare float @dx.op.binary.f32(i32, float, float) #0
+
+; Function Attrs: nounwind
+declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #1
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+attributes #2 = { nounwind readonly }
+
+!dx.version = !{!0}
+!dx.valver = !{!0}
+!dx.shaderModel = !{!1}
+!dx.entryPoints = !{!2}
+!llvm.ident = !{!12}
+
+!0 = !{i32 1, i32 0}
+!1 = !{!"ps", i32 6, i32 0}
+!2 = !{void ()* @main, !"main", !3, null, !11}
+!3 = !{!4, !9, null}
+!4 = !{!5, !7, !8}
+!5 = !{i32 0, !"A", i8 9, i8 0, !6, i8 2, i32 1, i8 1, i32 0, i8 0, null}
+!6 = !{i32 0}
+!7 = !{i32 1, !"B", i8 9, i8 0, !6, i8 2, i32 1, i8 1, i32 0, i8 1, null}
+!8 = !{i32 2, !"C", i8 9, i8 0, !6, i8 2, i32 1, i8 2, i32 0, i8 2, null}
+!9 = !{!10}
+!10 = !{i32 0, !"SV_Target", i8 9, i8 16, !6, i8 0, i32 1, i8 1, i32 0, i8 0, null}
+!11 = !{i32 0, i64 256}
+!12 = !{!"dxbc2dxil 1.2"}

+ 11 - 0
projects/dxilconv/test/dxbc2dxil/bool1.hlsl

@@ -0,0 +1,11 @@
+// RUN: %fxc /T ps_5_0 %s /Fo %t.dxbc
+// RUN: %dxbc2dxil %t.dxbc /emit-llvm /o %t.ll.converted
+// RUN: fc %b.ref %t.ll.converted
+
+
+
+
+int main(uint a : A) : SV_Target
+{
+    return firstbithigh(a);
+}

+ 50 - 0
projects/dxilconv/test/dxbc2dxil/bool1.ref

@@ -0,0 +1,50 @@
+
+define void @main() {
+entry:
+  %0 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+  %1 = call i32 @dx.op.unaryBits.i32(i32 33, i32 %0)
+  %2 = sub i32 0, %1
+  %3 = add i32 %2, 31
+  %4 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+  %5 = icmp ne i32 %4, 0
+  %6 = select i1 %5, i32 %3, i32 -1
+  call void @dx.op.storeOutput.i32(i32 5, i32 0, i32 0, i8 0, i32 %6)
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @dx.op.unaryBits.i32(i32, i32) #0
+
+; Function Attrs: nounwind readnone
+declare i32 @dx.op.loadInput.i32(i32, i32, i32, i8, i32) #0
+
+; Function Attrs: nounwind
+declare void @dx.op.tempRegStore.i32(i32, i32, i32) #1
+
+; Function Attrs: nounwind readonly
+declare i32 @dx.op.tempRegLoad.i32(i32, i32) #2
+
+; Function Attrs: nounwind
+declare void @dx.op.storeOutput.i32(i32, i32, i32, i8, i32) #1
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+attributes #2 = { nounwind readonly }
+
+!dx.version = !{!0}
+!dx.valver = !{!0}
+!dx.shaderModel = !{!1}
+!dx.entryPoints = !{!2}
+!llvm.ident = !{!10}
+
+!0 = !{i32 1, i32 0}
+!1 = !{!"ps", i32 6, i32 0}
+!2 = !{void ()* @main, !"main", !3, null, !9}
+!3 = !{!4, !7, null}
+!4 = !{!5}
+!5 = !{i32 0, !"A", i8 5, i8 0, !6, i8 1, i32 1, i8 1, i32 0, i8 0, null}
+!6 = !{i32 0}
+!7 = !{!8}
+!8 = !{i32 0, !"SV_Target", i8 4, i8 16, !6, i8 0, i32 1, i8 1, i32 0, i8 0, null}
+!9 = !{i32 0, i64 256}
+!10 = !{!"dxbc2dxil 1.2"}

+ 11 - 0
projects/dxilconv/test/dxbc2dxil/bool2.hlsl

@@ -0,0 +1,11 @@
+// RUN: %fxc /T ps_5_0 %s /Fo %t.dxbc
+// RUN: %dxbc2dxil %t.dxbc /emit-llvm /o %t.ll.converted
+// RUN: fc %b.ref %t.ll.converted
+
+
+
+
+float main(float a : A, float b : B) : SV_Target
+{
+    return a < b ? 0 : 1;
+}

+ 45 - 0
projects/dxilconv/test/dxbc2dxil/bool2.ref

@@ -0,0 +1,45 @@
+
+define void @main() {
+entry:
+  %0 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+  %1 = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0, i32 undef)
+  %2 = fcmp fast olt float %0, %1
+  %3 = select i1 %2, float 0.000000e+00, float 1.000000e+00
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %3)
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #0
+
+; Function Attrs: nounwind
+declare void @dx.op.tempRegStore.i32(i32, i32, i32) #1
+
+; Function Attrs: nounwind readonly
+declare i32 @dx.op.tempRegLoad.i32(i32, i32) #2
+
+; Function Attrs: nounwind
+declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #1
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+attributes #2 = { nounwind readonly }
+
+!dx.version = !{!0}
+!dx.valver = !{!0}
+!dx.shaderModel = !{!1}
+!dx.entryPoints = !{!2}
+!llvm.ident = !{!11}
+
+!0 = !{i32 1, i32 0}
+!1 = !{!"ps", i32 6, i32 0}
+!2 = !{void ()* @main, !"main", !3, null, !10}
+!3 = !{!4, !8, null}
+!4 = !{!5, !7}
+!5 = !{i32 0, !"A", i8 9, i8 0, !6, i8 2, i32 1, i8 1, i32 0, i8 0, null}
+!6 = !{i32 0}
+!7 = !{i32 1, !"B", i8 9, i8 0, !6, i8 2, i32 1, i8 1, i32 0, i8 1, null}
+!8 = !{!9}
+!9 = !{i32 0, !"SV_Target", i8 9, i8 16, !6, i8 0, i32 1, i8 1, i32 0, i8 0, null}
+!10 = !{i32 0, i64 256}
+!11 = !{!"dxbc2dxil 1.2"}

+ 34 - 0
projects/dxilconv/test/dxbc2dxil/bufinfo.hlsl

@@ -0,0 +1,34 @@
+// RUN: %fxc /T ps_5_0 %s /Fo %t.dxbc
+// RUN: %dxbc2dxil %t.dxbc /emit-llvm /o %t.ll.converted
+// RUN: fc %b.ref %t.ll.converted
+
+
+
+
+struct Foo
+{
+  float2 a;
+  float3 b;
+  int2 c[4];
+};
+
+StructuredBuffer<Foo> buf1;
+RWStructuredBuffer<Foo> buf2;
+
+ByteAddressBuffer buf3;
+RWByteAddressBuffer buf4;
+
+Buffer<unorm float2> buf5;
+RWBuffer<int3> buf6;
+
+uint main() : SV_Target
+{
+  uint r = 0, d1, d2;
+  buf1.GetDimensions(d1, d2); r += d1 + d2;
+  buf2.GetDimensions(d1, d2); r += d1 + d2;
+  buf3.GetDimensions(d1); r += d1;
+  buf4.GetDimensions(d1); r += d1;
+  buf5.GetDimensions(d1); r += d1;
+  buf6.GetDimensions(d1); r += d1;
+  return r;
+}

+ 89 - 0
projects/dxilconv/test/dxbc2dxil/bufinfo.ref

@@ -0,0 +1,89 @@
+
+%dx.types.Handle = type { i8* }
+%dx.types.Dimensions = type { i32, i32, i32, i32 }
+%dx.types.i8x52 = type { [52 x i8] }
+%dx.types.u32 = type { i32 }
+%dx.types.unorm_f32 = type { float }
+%dx.types.i32 = type { i32 }
+
+define void @main() {
+entry:
+  %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 0, i32 0, i1 false)
+  %1 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 1, i32 1, i1 false)
+  %2 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 2, i32 2, i1 false)
+  %3 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 1, i1 false)
+  %4 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 1, i32 2, i1 false)
+  %5 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 2, i32 3, i1 false)
+  %6 = call %dx.types.Dimensions @dx.op.getDimensions(i32 72, %dx.types.Handle %0, i32 undef)
+  %7 = extractvalue %dx.types.Dimensions %6, 0
+  %8 = add i32 %7, 52
+  %9 = call %dx.types.Dimensions @dx.op.getDimensions(i32 72, %dx.types.Handle %3, i32 undef)
+  %10 = extractvalue %dx.types.Dimensions %9, 0
+  %11 = add i32 %8, %10
+  %12 = add i32 %11, 52
+  %13 = call %dx.types.Dimensions @dx.op.getDimensions(i32 72, %dx.types.Handle %1, i32 undef)
+  %14 = extractvalue %dx.types.Dimensions %13, 0
+  %15 = add i32 %14, %12
+  %16 = call %dx.types.Dimensions @dx.op.getDimensions(i32 72, %dx.types.Handle %4, i32 undef)
+  %17 = extractvalue %dx.types.Dimensions %16, 0
+  %18 = add i32 %17, %15
+  %19 = call %dx.types.Dimensions @dx.op.getDimensions(i32 72, %dx.types.Handle %2, i32 undef)
+  %20 = extractvalue %dx.types.Dimensions %19, 0
+  %21 = add i32 %20, %18
+  %22 = call %dx.types.Dimensions @dx.op.getDimensions(i32 72, %dx.types.Handle %5, i32 undef)
+  %23 = extractvalue %dx.types.Dimensions %22, 0
+  %24 = add i32 %23, %21
+  call void @dx.op.storeOutput.i32(i32 5, i32 0, i32 0, i8 0, i32 %24)
+  ret void
+}
+
+; Function Attrs: nounwind readonly
+declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #0
+
+; Function Attrs: nounwind readonly
+declare %dx.types.Dimensions @dx.op.getDimensions(i32, %dx.types.Handle, i32) #0
+
+; Function Attrs: nounwind
+declare void @dx.op.tempRegStore.i32(i32, i32, i32) #1
+
+; Function Attrs: nounwind readonly
+declare i32 @dx.op.tempRegLoad.i32(i32, i32) #0
+
+; Function Attrs: nounwind
+declare void @dx.op.storeOutput.i32(i32, i32, i32, i8, i32) #1
+
+attributes #0 = { nounwind readonly }
+attributes #1 = { nounwind }
+
+!dx.version = !{!0}
+!dx.valver = !{!0}
+!dx.shaderModel = !{!1}
+!dx.resources = !{!2}
+!dx.typeAnnotations = !{!14}
+!dx.entryPoints = !{!17}
+!llvm.ident = !{!23}
+
+!0 = !{i32 1, i32 0}
+!1 = !{!"ps", i32 6, i32 0}
+!2 = !{!3, !9, null, null}
+!3 = !{!4, !6, !7}
+!4 = !{i32 0, %dx.types.i8x52 addrspace(1)* undef, !"T0", i32 0, i32 0, i32 1, i32 12, i32 0, !5}
+!5 = !{i32 1, i32 52}
+!6 = !{i32 1, %dx.types.u32 addrspace(1)* undef, !"T1", i32 0, i32 1, i32 1, i32 11, i32 0, null}
+!7 = !{i32 2, %dx.types.unorm_f32 addrspace(1)* undef, !"T2", i32 0, i32 2, i32 1, i32 10, i32 0, !8}
+!8 = !{i32 0, i32 14}
+!9 = !{!10, !11, !12}
+!10 = !{i32 0, %dx.types.i8x52 addrspace(1)* undef, !"U0", i32 0, i32 1, i32 1, i32 12, i1 false, i1 false, i1 false, !5}
+!11 = !{i32 1, %dx.types.u32 addrspace(1)* undef, !"U1", i32 0, i32 2, i32 1, i32 11, i1 false, i1 false, i1 false, null}
+!12 = !{i32 2, %dx.types.i32 addrspace(1)* undef, !"U2", i32 0, i32 3, i32 1, i32 10, i1 false, i1 false, i1 false, !13}
+!13 = !{i32 0, i32 4}
+!14 = !{i32 0, %dx.types.unorm_f32 undef, !15}
+!15 = !{i32 0, !16}
+!16 = !{i32 7, i32 14}
+!17 = !{void ()* @main, !"main", !18, !2, !22}
+!18 = !{null, !19, null}
+!19 = !{!20}
+!20 = !{i32 0, !"SV_Target", i8 5, i8 16, !21, i8 0, i32 1, i8 1, i32 0, i8 0, null}
+!21 = !{i32 0}
+!22 = !{i32 0, i64 256}
+!23 = !{!"dxbc2dxil 1.2"}

+ 22 - 0
projects/dxilconv/test/dxbc2dxil/calc_lod.hlsl

@@ -0,0 +1,22 @@
+// RUN: %fxc /T ps_5_0 %s /Fo %t.dxbc
+// RUN: %dxbc2dxil %t.dxbc /emit-llvm /o %t.ll.converted
+// RUN: fc %b.ref %t.ll.converted
+
+
+
+
+SamplerState samp1;
+
+Texture2D<float4> tex1;
+Texture2DArray<float4> tex2;
+TextureCubeArray<float4> tex3;
+
+float4 main(float4 a : A) : SV_Target
+{
+  float4 r = 0;
+  r += tex1.CalculateLevelOfDetail(samp1, a.xy);  // sampler, coordinates
+  r += tex2.CalculateLevelOfDetail(samp1, a.xy);
+  r += tex3.CalculateLevelOfDetail(samp1, a.xyz);
+
+  return r;
+}

+ 81 - 0
projects/dxilconv/test/dxbc2dxil/calc_lod.ref

@@ -0,0 +1,81 @@
+
+%dx.types.Handle = type { i8* }
+%dx.types.f32 = type { float }
+%dx.types.Sampler = type opaque
+
+define void @main() {
+entry:
+  %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 0, i32 0, i1 false)
+  %1 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 1, i32 1, i1 false)
+  %2 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 2, i32 2, i1 false)
+  %3 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 3, i32 0, i32 0, i1 false)
+  %4 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+  %5 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 undef)
+  %6 = call float @dx.op.calculateLOD.f32(i32 81, %dx.types.Handle %0, %dx.types.Handle %3, float %4, float %5, float undef, i1 true)
+  %7 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+  %8 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 undef)
+  %9 = call float @dx.op.calculateLOD.f32(i32 81, %dx.types.Handle %1, %dx.types.Handle %3, float %7, float %8, float undef, i1 true)
+  %10 = fadd fast float %9, %6
+  %11 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+  %12 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 undef)
+  %13 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 2, i32 undef)
+  %14 = call float @dx.op.calculateLOD.f32(i32 81, %dx.types.Handle %2, %dx.types.Handle %3, float %11, float %12, float %13, i1 true)
+  %15 = fadd fast float %14, %10
+  %16 = fadd fast float %14, %10
+  %17 = fadd fast float %14, %10
+  %18 = fadd fast float %14, %10
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %15)
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %16)
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %17)
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %18)
+  ret void
+}
+
+; Function Attrs: nounwind readonly
+declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #0
+
+; Function Attrs: nounwind readnone
+declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #1
+
+; Function Attrs: nounwind readonly
+declare float @dx.op.calculateLOD.f32(i32, %dx.types.Handle, %dx.types.Handle, float, float, float, i1) #0
+
+; Function Attrs: nounwind
+declare void @dx.op.tempRegStore.f32(i32, i32, float) #2
+
+; Function Attrs: nounwind readonly
+declare float @dx.op.tempRegLoad.f32(i32, i32) #0
+
+; Function Attrs: nounwind
+declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #2
+
+attributes #0 = { nounwind readonly }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind }
+
+!dx.version = !{!0}
+!dx.valver = !{!0}
+!dx.shaderModel = !{!1}
+!dx.resources = !{!2}
+!dx.entryPoints = !{!10}
+!llvm.ident = !{!18}
+
+!0 = !{i32 1, i32 0}
+!1 = !{!"ps", i32 6, i32 0}
+!2 = !{!3, null, null, !8}
+!3 = !{!4, !6, !7}
+!4 = !{i32 0, %dx.types.f32 addrspace(1)* undef, !"T0", i32 0, i32 0, i32 1, i32 2, i32 0, !5}
+!5 = !{i32 0, i32 9}
+!6 = !{i32 1, %dx.types.f32 addrspace(1)* undef, !"T1", i32 0, i32 1, i32 1, i32 7, i32 0, !5}
+!7 = !{i32 2, %dx.types.f32 addrspace(1)* undef, !"T2", i32 0, i32 2, i32 1, i32 9, i32 0, !5}
+!8 = !{!9}
+!9 = !{i32 0, %dx.types.Sampler addrspace(1)* undef, !"S0", i32 0, i32 0, i32 1, i32 0, null}
+!10 = !{void ()* @main, !"main", !11, !2, !17}
+!11 = !{!12, !15, null}
+!12 = !{!13}
+!13 = !{i32 0, !"A", i8 9, i8 0, !14, i8 2, i32 1, i8 4, i32 0, i8 0, null}
+!14 = !{i32 0}
+!15 = !{!16}
+!16 = !{i32 0, !"SV_Target", i8 9, i8 16, !14, i8 0, i32 1, i8 4, i32 0, i8 0, null}
+!17 = !{i32 0, i64 256}
+!18 = !{!"dxbc2dxil 1.2"}

+ 25 - 0
projects/dxilconv/test/dxbc2dxil/call1.hlsl

@@ -0,0 +1,25 @@
+// RUN: %fxc /T ps_5_0 %s /Fo %t.dxbc
+// RUN: %dxbc2dxil %t.dxbc /emit-llvm /o %t.ll.converted
+// RUN: fc %b.ref %t.ll.converted
+
+
+
+
+float main(float2 a : A, int3 b : B) : SV_Target
+{
+  float r;
+  [call]
+  switch(b.x)
+  {
+  case 1:
+    r = 5.f;
+    break;
+  case 2:
+    r = a.x;
+    break;
+  default:
+    r = 3.f;
+    break;
+  }
+  return r;
+}

+ 93 - 0
projects/dxilconv/test/dxbc2dxil/call1.ref

@@ -0,0 +1,93 @@
+
[email protected] = internal global float undef, align 4
+
+define internal void @dx.label.0() {
+entry:
+  %0 = call float @dx.op.bitcastI32toF32(i32 126, i32 1084227584)
+  store float %0, float* @dx.v32.r0
+  ret void
+}
+
+define internal void @dx.label.1() {
+entry:
+  %0 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+  store float %0, float* @dx.v32.r0
+  ret void
+}
+
+define internal void @dx.label.2() {
+entry:
+  %0 = call float @dx.op.bitcastI32toF32(i32 126, i32 1077936128)
+  store float %0, float* @dx.v32.r0
+  ret void
+}
+
+define void @main() {
+entry:
+  %0 = call i32 @dx.op.loadInput.i32(i32 4, i32 1, i32 0, i8 0, i32 undef)
+  switch i32 %0, label %switch0.default [
+    i32 1, label %switch0.casegroup0
+    i32 2, label %switch0.casegroup1
+  ]
+
+switch0.casegroup0:                               ; preds = %entry
+  call void @dx.label.0()
+  br label %switch0.end
+
+switch0.casegroup1:                               ; preds = %entry
+  call void @dx.label.1()
+  br label %switch0.end
+
+switch0.default:                                  ; preds = %entry
+  call void @dx.label.2()
+  br label %switch0.end
+
+switch0.end:                                      ; preds = %switch0.default, %switch0.casegroup1, %switch0.casegroup0
+  %1 = load float, float* @dx.v32.r0
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %1)
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @dx.op.loadInput.i32(i32, i32, i32, i8, i32) #0
+
+; Function Attrs: nounwind readonly
+declare float @dx.op.tempRegLoad.f32(i32, i32) #1
+
+; Function Attrs: nounwind
+declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #2
+
+; Function Attrs: nounwind
+declare void @dx.op.tempRegStore.i32(i32, i32, i32) #2
+
+; Function Attrs: nounwind readnone
+declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #0
+
+; Function Attrs: nounwind
+declare void @dx.op.tempRegStore.f32(i32, i32, float) #2
+
+; Function Attrs: nounwind readnone
+declare float @dx.op.bitcastI32toF32(i32, i32) #0
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind readonly }
+attributes #2 = { nounwind }
+
+!dx.version = !{!0}
+!dx.valver = !{!0}
+!dx.shaderModel = !{!1}
+!dx.entryPoints = !{!2}
+!llvm.ident = !{!11}
+
+!0 = !{i32 1, i32 0}
+!1 = !{!"ps", i32 6, i32 0}
+!2 = !{void ()* @main, !"main", !3, null, !10}
+!3 = !{!4, !8, null}
+!4 = !{!5, !7}
+!5 = !{i32 0, !"A", i8 9, i8 0, !6, i8 2, i32 1, i8 2, i32 0, i8 0, null}
+!6 = !{i32 0}
+!7 = !{i32 1, !"B", i8 4, i8 0, !6, i8 1, i32 1, i8 3, i32 1, i8 0, null}
+!8 = !{!9}
+!9 = !{i32 0, !"SV_Target", i8 9, i8 16, !6, i8 0, i32 1, i8 1, i32 0, i8 0, null}
+!10 = !{i32 0, i64 256}
+!11 = !{!"dxbc2dxil 1.2"}

+ 32 - 0
projects/dxilconv/test/dxbc2dxil/call3.hlsl

@@ -0,0 +1,32 @@
+// RUN: %fxc /T ps_5_0 %s /Fo %t.dxbc
+// RUN: %dxbc2dxil %t.dxbc /emit-llvm /o %t.ll.converted
+// RUN: fc %b.ref %t.ll.converted
+
+
+
+
+float main(float2 a : A, int3 b : B) : SV_Target
+{
+  float r;
+  [branch]
+  if (b.y)
+    return a.y;
+
+  [call]
+  switch(b.x)
+  {
+  case 1:
+    [branch]
+    if (b.y)
+      return a.y;
+    r = 5.f;
+    break;
+  case 2:
+    r = a.x;
+    break;
+  default:
+    r = 3.f;
+    break;
+  }
+  return r;
+}

+ 132 - 0
projects/dxilconv/test/dxbc2dxil/call3.ref

@@ -0,0 +1,132 @@
+
[email protected] = internal global float undef, align 4
[email protected] = internal global i32 undef, align 4
[email protected] = internal global float undef, align 4
+
+define internal void @dx.label.0() {
+entry:
+  %0 = call i32 @dx.op.loadInput.i32(i32 4, i32 1, i32 0, i8 1, i32 undef)
+  %1 = icmp ne i32 %0, 0
+  br i1 %1, label %if1.then, label %if1.end
+
+if1.then:                                         ; preds = %entry
+  %2 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 undef)
+  store float %2, float* @dx.v32.r0
+  store i32 -1, i32* @dx.v32.r2
+  ret void
+
+if1.end:                                          ; preds = %entry
+  %3 = call float @dx.op.bitcastI32toF32(i32 126, i32 1084227584)
+  store float %3, float* @dx.v32.r1
+  store i32 0, i32* @dx.v32.r2
+  ret void
+}
+
+define internal void @dx.label.1() {
+entry:
+  %0 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+  store float %0, float* @dx.v32.r1
+  store i32 0, i32* @dx.v32.r2
+  ret void
+}
+
+define internal void @dx.label.2() {
+entry:
+  %0 = call float @dx.op.bitcastI32toF32(i32 126, i32 1077936128)
+  store float %0, float* @dx.v32.r1
+  store i32 0, i32* @dx.v32.r2
+  ret void
+}
+
+define void @main() {
+entry:
+  %0 = call i32 @dx.op.loadInput.i32(i32 4, i32 1, i32 0, i8 1, i32 undef)
+  %1 = icmp ne i32 %0, 0
+  br i1 %1, label %if0.then, label %if0.end
+
+if0.then:                                         ; preds = %entry
+  %2 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 undef)
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %2)
+  ret void
+
+if0.end:                                          ; preds = %entry
+  %3 = call i32 @dx.op.loadInput.i32(i32 4, i32 1, i32 0, i8 0, i32 undef)
+  switch i32 %3, label %switch0.default [
+    i32 1, label %switch0.casegroup0
+    i32 2, label %switch0.casegroup1
+  ]
+
+switch0.casegroup0:                               ; preds = %if0.end
+  call void @dx.label.0()
+  br label %switch0.end
+
+switch0.casegroup1:                               ; preds = %if0.end
+  call void @dx.label.1()
+  br label %switch0.end
+
+switch0.default:                                  ; preds = %if0.end
+  call void @dx.label.2()
+  br label %switch0.end
+
+switch0.end:                                      ; preds = %switch0.default, %switch0.casegroup1, %switch0.casegroup0
+  %4 = load float, float* @dx.v32.r0
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %4)
+  %5 = load i32, i32* @dx.v32.r2
+  %6 = icmp ne i32 %5, 0
+  br i1 %6, label %label0.callc0.retc0, label %label0.callc0.afterretc0
+
+label0.callc0.retc0:                              ; preds = %switch0.end
+  ret void
+
+label0.callc0.afterretc0:                         ; preds = %switch0.end
+  %7 = load float, float* @dx.v32.r1
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %7)
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @dx.op.loadInput.i32(i32, i32, i32, i8, i32) #0
+
+; Function Attrs: nounwind readnone
+declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #0
+
+; Function Attrs: nounwind
+declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #1
+
+; Function Attrs: nounwind readonly
+declare float @dx.op.tempRegLoad.f32(i32, i32) #2
+
+; Function Attrs: nounwind readonly
+declare i32 @dx.op.tempRegLoad.i32(i32, i32) #2
+
+; Function Attrs: nounwind
+declare void @dx.op.tempRegStore.f32(i32, i32, float) #1
+
+; Function Attrs: nounwind
+declare void @dx.op.tempRegStore.i32(i32, i32, i32) #1
+
+; Function Attrs: nounwind readnone
+declare float @dx.op.bitcastI32toF32(i32, i32) #0
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+attributes #2 = { nounwind readonly }
+
+!dx.version = !{!0}
+!dx.valver = !{!0}
+!dx.shaderModel = !{!1}
+!dx.entryPoints = !{!2}
+!llvm.ident = !{!11}
+
+!0 = !{i32 1, i32 0}
+!1 = !{!"ps", i32 6, i32 0}
+!2 = !{void ()* @main, !"main", !3, null, !10}
+!3 = !{!4, !8, null}
+!4 = !{!5, !7}
+!5 = !{i32 0, !"A", i8 9, i8 0, !6, i8 2, i32 1, i8 2, i32 0, i8 0, null}
+!6 = !{i32 0}
+!7 = !{i32 1, !"B", i8 4, i8 0, !6, i8 1, i32 1, i8 3, i32 1, i8 0, null}
+!8 = !{!9}
+!9 = !{i32 0, !"SV_Target", i8 9, i8 16, !6, i8 0, i32 1, i8 1, i32 0, i8 0, null}
+!10 = !{i32 0, i64 256}
+!11 = !{!"dxbc2dxil 1.2"}

+ 11 - 0
projects/dxilconv/test/dxbc2dxil/cast1.hlsl

@@ -0,0 +1,11 @@
+// RUN: %fxc /T ps_5_0 %s /Fo %t.dxbc
+// RUN: %dxbc2dxil %t.dxbc /emit-llvm /o %t.ll.converted
+// RUN: fc %b.ref %t.ll.converted
+
+
+
+
+float main(int a : A) : SV_Target
+{
+  return a;
+}

+ 35 - 0
projects/dxilconv/test/dxbc2dxil/cast1.ref

@@ -0,0 +1,35 @@
+
+define void @main() {
+entry:
+  %0 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+  %1 = sitofp i32 %0 to float
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %1)
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @dx.op.loadInput.i32(i32, i32, i32, i8, i32) #0
+
+; Function Attrs: nounwind
+declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #1
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+
+!dx.version = !{!0}
+!dx.valver = !{!0}
+!dx.shaderModel = !{!1}
+!dx.entryPoints = !{!2}
+!llvm.ident = !{!10}
+
+!0 = !{i32 1, i32 0}
+!1 = !{!"ps", i32 6, i32 0}
+!2 = !{void ()* @main, !"main", !3, null, !9}
+!3 = !{!4, !7, null}
+!4 = !{!5}
+!5 = !{i32 0, !"A", i8 4, i8 0, !6, i8 1, i32 1, i8 1, i32 0, i8 0, null}
+!6 = !{i32 0}
+!7 = !{!8}
+!8 = !{i32 0, !"SV_Target", i8 9, i8 16, !6, i8 0, i32 1, i8 1, i32 0, i8 0, null}
+!9 = !{i32 0, i64 256}
+!10 = !{!"dxbc2dxil 1.2"}

+ 11 - 0
projects/dxilconv/test/dxbc2dxil/cast2.hlsl

@@ -0,0 +1,11 @@
+// RUN: %fxc /T ps_5_0 %s /Fo %t.dxbc
+// RUN: %dxbc2dxil %t.dxbc /emit-llvm /o %t.ll.converted
+// RUN: fc %b.ref %t.ll.converted
+
+
+
+
+float main(uint a : A) : SV_Target
+{
+  return a;
+}

+ 35 - 0
projects/dxilconv/test/dxbc2dxil/cast2.ref

@@ -0,0 +1,35 @@
+
+define void @main() {
+entry:
+  %0 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+  %1 = uitofp i32 %0 to float
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %1)
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @dx.op.loadInput.i32(i32, i32, i32, i8, i32) #0
+
+; Function Attrs: nounwind
+declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #1
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+
+!dx.version = !{!0}
+!dx.valver = !{!0}
+!dx.shaderModel = !{!1}
+!dx.entryPoints = !{!2}
+!llvm.ident = !{!10}
+
+!0 = !{i32 1, i32 0}
+!1 = !{!"ps", i32 6, i32 0}
+!2 = !{void ()* @main, !"main", !3, null, !9}
+!3 = !{!4, !7, null}
+!4 = !{!5}
+!5 = !{i32 0, !"A", i8 5, i8 0, !6, i8 1, i32 1, i8 1, i32 0, i8 0, null}
+!6 = !{i32 0}
+!7 = !{!8}
+!8 = !{i32 0, !"SV_Target", i8 9, i8 16, !6, i8 0, i32 1, i8 1, i32 0, i8 0, null}
+!9 = !{i32 0, i64 256}
+!10 = !{!"dxbc2dxil 1.2"}

+ 11 - 0
projects/dxilconv/test/dxbc2dxil/cast3.hlsl

@@ -0,0 +1,11 @@
+// RUN: %fxc /T ps_5_0 %s /Fo %t.dxbc
+// RUN: %dxbc2dxil %t.dxbc /emit-llvm /o %t.ll.converted
+// RUN: fc %b.ref %t.ll.converted
+
+
+
+
+int main(float a : A) : SV_Target
+{
+  return a;
+}

+ 35 - 0
projects/dxilconv/test/dxbc2dxil/cast3.ref

@@ -0,0 +1,35 @@
+
+define void @main() {
+entry:
+  %0 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+  %1 = fptosi float %0 to i32
+  call void @dx.op.storeOutput.i32(i32 5, i32 0, i32 0, i8 0, i32 %1)
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #0
+
+; Function Attrs: nounwind
+declare void @dx.op.storeOutput.i32(i32, i32, i32, i8, i32) #1
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+
+!dx.version = !{!0}
+!dx.valver = !{!0}
+!dx.shaderModel = !{!1}
+!dx.entryPoints = !{!2}
+!llvm.ident = !{!10}
+
+!0 = !{i32 1, i32 0}
+!1 = !{!"ps", i32 6, i32 0}
+!2 = !{void ()* @main, !"main", !3, null, !9}
+!3 = !{!4, !7, null}
+!4 = !{!5}
+!5 = !{i32 0, !"A", i8 9, i8 0, !6, i8 2, i32 1, i8 1, i32 0, i8 0, null}
+!6 = !{i32 0}
+!7 = !{!8}
+!8 = !{i32 0, !"SV_Target", i8 4, i8 16, !6, i8 0, i32 1, i8 1, i32 0, i8 0, null}
+!9 = !{i32 0, i64 256}
+!10 = !{!"dxbc2dxil 1.2"}

+ 11 - 0
projects/dxilconv/test/dxbc2dxil/cast4.hlsl

@@ -0,0 +1,11 @@
+// RUN: %fxc /T ps_5_0 %s /Fo %t.dxbc
+// RUN: %dxbc2dxil %t.dxbc /emit-llvm /o %t.ll.converted
+// RUN: fc %b.ref %t.ll.converted
+
+
+
+
+uint main(float a : A) : SV_Target
+{
+  return a;
+}

+ 35 - 0
projects/dxilconv/test/dxbc2dxil/cast4.ref

@@ -0,0 +1,35 @@
+
+define void @main() {
+entry:
+  %0 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+  %1 = fptoui float %0 to i32
+  call void @dx.op.storeOutput.i32(i32 5, i32 0, i32 0, i8 0, i32 %1)
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #0
+
+; Function Attrs: nounwind
+declare void @dx.op.storeOutput.i32(i32, i32, i32, i8, i32) #1
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+
+!dx.version = !{!0}
+!dx.valver = !{!0}
+!dx.shaderModel = !{!1}
+!dx.entryPoints = !{!2}
+!llvm.ident = !{!10}
+
+!0 = !{i32 1, i32 0}
+!1 = !{!"ps", i32 6, i32 0}
+!2 = !{void ()* @main, !"main", !3, null, !9}
+!3 = !{!4, !7, null}
+!4 = !{!5}
+!5 = !{i32 0, !"A", i8 9, i8 0, !6, i8 2, i32 1, i8 1, i32 0, i8 0, null}
+!6 = !{i32 0}
+!7 = !{!8}
+!8 = !{i32 0, !"SV_Target", i8 5, i8 16, !6, i8 0, i32 1, i8 1, i32 0, i8 0, null}
+!9 = !{i32 0, i64 256}
+!10 = !{!"dxbc2dxil 1.2"}

+ 11 - 0
projects/dxilconv/test/dxbc2dxil/cast5.hlsl

@@ -0,0 +1,11 @@
+// RUN: %fxc /T ps_5_0 %s /Fo %t.dxbc
+// RUN: %dxbc2dxil %t.dxbc /emit-llvm /o %t.ll.converted
+// RUN: fc %b.ref %t.ll.converted
+
+
+
+
+float main(min16float a : A) : SV_Target
+{
+  return a;
+}

+ 35 - 0
projects/dxilconv/test/dxbc2dxil/cast5.ref

@@ -0,0 +1,35 @@
+
+define void @main() {
+entry:
+  %0 = call half @dx.op.loadInput.f16(i32 4, i32 0, i32 0, i8 0, i32 undef)
+  %1 = fpext half %0 to float
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %1)
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare half @dx.op.loadInput.f16(i32, i32, i32, i8, i32) #0
+
+; Function Attrs: nounwind
+declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #1
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+
+!dx.version = !{!0}
+!dx.valver = !{!0}
+!dx.shaderModel = !{!1}
+!dx.entryPoints = !{!2}
+!llvm.ident = !{!10}
+
+!0 = !{i32 1, i32 0}
+!1 = !{!"ps", i32 6, i32 0}
+!2 = !{void ()* @main, !"main", !3, null, !9}
+!3 = !{!4, !7, null}
+!4 = !{!5}
+!5 = !{i32 0, !"A", i8 8, i8 0, !6, i8 2, i32 1, i8 1, i32 0, i8 0, null}
+!6 = !{i32 0}
+!7 = !{!8}
+!8 = !{i32 0, !"SV_Target", i8 9, i8 16, !6, i8 0, i32 1, i8 1, i32 0, i8 0, null}
+!9 = !{i32 0, i64 288}
+!10 = !{!"dxbc2dxil 1.2"}

+ 11 - 0
projects/dxilconv/test/dxbc2dxil/cast6.hlsl

@@ -0,0 +1,11 @@
+// RUN: %fxc /T ps_5_0 %s /Fo %t.dxbc
+// RUN: %dxbc2dxil %t.dxbc /emit-llvm /o %t.ll.converted
+// RUN: fc %b.ref %t.ll.converted
+
+
+
+
+min16float main(float a : A) : SV_Target
+{
+  return a;
+}

+ 35 - 0
projects/dxilconv/test/dxbc2dxil/cast6.ref

@@ -0,0 +1,35 @@
+
+define void @main() {
+entry:
+  %0 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+  %1 = fptrunc float %0 to half
+  call void @dx.op.storeOutput.f16(i32 5, i32 0, i32 0, i8 0, half %1)
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #0
+
+; Function Attrs: nounwind
+declare void @dx.op.storeOutput.f16(i32, i32, i32, i8, half) #1
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+
+!dx.version = !{!0}
+!dx.valver = !{!0}
+!dx.shaderModel = !{!1}
+!dx.entryPoints = !{!2}
+!llvm.ident = !{!10}
+
+!0 = !{i32 1, i32 0}
+!1 = !{!"ps", i32 6, i32 0}
+!2 = !{void ()* @main, !"main", !3, null, !9}
+!3 = !{!4, !7, null}
+!4 = !{!5}
+!5 = !{i32 0, !"A", i8 9, i8 0, !6, i8 2, i32 1, i8 1, i32 0, i8 0, null}
+!6 = !{i32 0}
+!7 = !{!8}
+!8 = !{i32 0, !"SV_Target", i8 8, i8 16, !6, i8 0, i32 1, i8 1, i32 0, i8 0, null}
+!9 = !{i32 0, i64 288}
+!10 = !{!"dxbc2dxil 1.2"}

+ 20 - 0
projects/dxilconv/test/dxbc2dxil/cbuffer1.50.hlsl

@@ -0,0 +1,20 @@
+// RUN: %fxc /T ps_5_0 %s /Fo %t.dxbc
+// RUN: %dxbc2dxil %t.dxbc /emit-llvm /o %t.ll.converted
+// RUN: fc %b.ref %t.ll.converted
+
+
+
+
+cbuffer Foo1 : register(b5)
+{
+  float4 g1;
+}
+cbuffer Foo2 : register(b5)
+{
+  float4 g2;
+}
+
+float4 main() : SV_TARGET
+{
+  return g2;
+}

+ 51 - 0
projects/dxilconv/test/dxbc2dxil/cbuffer1.50.ref

@@ -0,0 +1,51 @@
+
+%dx.types.Handle = type { i8* }
+%dx.types.CBufRet.f32 = type { float, float, float, float }
+%dx.types.i8x16 = type { [16 x i8] }
+
+define void @main() {
+entry:
+  %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 5, i1 false)
+  %1 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %0, i32 0)
+  %2 = extractvalue %dx.types.CBufRet.f32 %1, 0
+  %3 = extractvalue %dx.types.CBufRet.f32 %1, 1
+  %4 = extractvalue %dx.types.CBufRet.f32 %1, 2
+  %5 = extractvalue %dx.types.CBufRet.f32 %1, 3
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %2)
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %3)
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %4)
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %5)
+  ret void
+}
+
+; Function Attrs: nounwind readonly
+declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #0
+
+; Function Attrs: nounwind readonly
+declare %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32, %dx.types.Handle, i32) #0
+
+; Function Attrs: nounwind
+declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #1
+
+attributes #0 = { nounwind readonly }
+attributes #1 = { nounwind }
+
+!dx.version = !{!0}
+!dx.valver = !{!0}
+!dx.shaderModel = !{!1}
+!dx.resources = !{!2}
+!dx.entryPoints = !{!5}
+!llvm.ident = !{!11}
+
+!0 = !{i32 1, i32 0}
+!1 = !{!"ps", i32 6, i32 0}
+!2 = !{null, null, !3, null}
+!3 = !{!4}
+!4 = !{i32 0, %dx.types.i8x16 addrspace(2)* undef, !"CB0", i32 0, i32 5, i32 1, i32 16, null}
+!5 = !{void ()* @main, !"main", !6, !2, !10}
+!6 = !{null, !7, null}
+!7 = !{!8}
+!8 = !{i32 0, !"SV_Target", i8 9, i8 16, !9, i8 0, i32 1, i8 4, i32 0, i8 0, null}
+!9 = !{i32 0}
+!10 = !{i32 0, i64 256}
+!11 = !{!"dxbc2dxil 1.2"}

+ 21 - 0
projects/dxilconv/test/dxbc2dxil/cbuffer1.51.hlsl

@@ -0,0 +1,21 @@
+// RUN: %fxc /T ps_5_1 %s /Fo %t.dxbc
+// RUN: %dxbc2dxil %t.dxbc /emit-llvm /o %t.ll.converted
+// RUN: fc %b.ref %t.ll.converted
+
+
+
+
+cbuffer Foo1 : register(b5)
+{
+  float4 g1;
+}
+cbuffer Foo2 : register(b5)
+{
+  float4 g2;
+}
+
+[RootSignature("DescriptorTable(CBV(b5, numDescriptors=1), visibility=SHADER_VISIBILITY_ALL)")]
+float4 main() : SV_TARGET
+{
+  return g2;
+}

+ 50 - 0
projects/dxilconv/test/dxbc2dxil/cbuffer1.51.ref

@@ -0,0 +1,50 @@
+
+%dx.types.Handle = type { i8* }
+%dx.types.CBufRet.f32 = type { float, float, float, float }
+%dx.types.i8x16 = type { [16 x i8] }
+
+define void @main() {
+entry:
+  %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 5, i1 false)
+  %1 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %0, i32 0)
+  %2 = extractvalue %dx.types.CBufRet.f32 %1, 0
+  %3 = extractvalue %dx.types.CBufRet.f32 %1, 1
+  %4 = extractvalue %dx.types.CBufRet.f32 %1, 2
+  %5 = extractvalue %dx.types.CBufRet.f32 %1, 3
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %2)
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %3)
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %4)
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %5)
+  ret void
+}
+
+; Function Attrs: nounwind readonly
+declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #0
+
+; Function Attrs: nounwind readonly
+declare %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32, %dx.types.Handle, i32) #0
+
+; Function Attrs: nounwind
+declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #1
+
+attributes #0 = { nounwind readonly }
+attributes #1 = { nounwind }
+
+!dx.version = !{!0}
+!dx.valver = !{!0}
+!dx.shaderModel = !{!1}
+!dx.resources = !{!2}
+!dx.entryPoints = !{!5}
+!llvm.ident = !{!10}
+
+!0 = !{i32 1, i32 0}
+!1 = !{!"ps", i32 6, i32 0}
+!2 = !{null, null, !3, null}
+!3 = !{!4}
+!4 = !{i32 0, %dx.types.i8x16 addrspace(2)* undef, !"CB0", i32 0, i32 5, i32 1, i32 16, null}
+!5 = !{void ()* @main, !"main", !6, !2, null}
+!6 = !{null, !7, null}
+!7 = !{!8}
+!8 = !{i32 0, !"SV_Target", i8 9, i8 16, !9, i8 0, i32 1, i8 4, i32 0, i8 0, null}
+!9 = !{i32 0}
+!10 = !{!"dxbc2dxil 1.2"}

+ 13 - 0
projects/dxilconv/test/dxbc2dxil/cbuffer2.50.hlsl

@@ -0,0 +1,13 @@
+// RUN: %fxc /T ps_5_0 %s /Fo %t.dxbc
+// RUN: %dxbc2dxil %t.dxbc /emit-llvm /o %t.ll.converted
+// RUN: fc %b.ref %t.ll.converted
+
+
+
+
+float4 g1;
+
+float4 main() : SV_TARGET
+{
+  return g1.wyyy;
+}

+ 49 - 0
projects/dxilconv/test/dxbc2dxil/cbuffer2.50.ref

@@ -0,0 +1,49 @@
+
+%dx.types.Handle = type { i8* }
+%dx.types.CBufRet.f32 = type { float, float, float, float }
+%dx.types.i8x16 = type { [16 x i8] }
+
+define void @main() {
+entry:
+  %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 0, i1 false)
+  %1 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %0, i32 0)
+  %2 = extractvalue %dx.types.CBufRet.f32 %1, 3
+  %3 = extractvalue %dx.types.CBufRet.f32 %1, 1
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %2)
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %3)
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %3)
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %3)
+  ret void
+}
+
+; Function Attrs: nounwind readonly
+declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #0
+
+; Function Attrs: nounwind readonly
+declare %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32, %dx.types.Handle, i32) #0
+
+; Function Attrs: nounwind
+declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #1
+
+attributes #0 = { nounwind readonly }
+attributes #1 = { nounwind }
+
+!dx.version = !{!0}
+!dx.valver = !{!0}
+!dx.shaderModel = !{!1}
+!dx.resources = !{!2}
+!dx.entryPoints = !{!5}
+!llvm.ident = !{!11}
+
+!0 = !{i32 1, i32 0}
+!1 = !{!"ps", i32 6, i32 0}
+!2 = !{null, null, !3, null}
+!3 = !{!4}
+!4 = !{i32 0, %dx.types.i8x16 addrspace(2)* undef, !"CB0", i32 0, i32 0, i32 1, i32 16, null}
+!5 = !{void ()* @main, !"main", !6, !2, !10}
+!6 = !{null, !7, null}
+!7 = !{!8}
+!8 = !{i32 0, !"SV_Target", i8 9, i8 16, !9, i8 0, i32 1, i8 4, i32 0, i8 0, null}
+!9 = !{i32 0}
+!10 = !{i32 0, i64 256}
+!11 = !{!"dxbc2dxil 1.2"}

+ 14 - 0
projects/dxilconv/test/dxbc2dxil/cbuffer2.51.hlsl

@@ -0,0 +1,14 @@
+// RUN: %fxc /T ps_5_1 %s /Fo %t.dxbc
+// RUN: %dxbc2dxil %t.dxbc /emit-llvm /o %t.ll.converted
+// RUN: fc %b.ref %t.ll.converted
+
+
+
+
+float4 g1;
+
+[RootSignature("DescriptorTable(CBV(b0, numDescriptors=1), visibility=SHADER_VISIBILITY_ALL)")]
+float4 main() : SV_TARGET
+{
+  return g1.wyyy;
+}

+ 48 - 0
projects/dxilconv/test/dxbc2dxil/cbuffer2.51.ref

@@ -0,0 +1,48 @@
+
+%dx.types.Handle = type { i8* }
+%dx.types.CBufRet.f32 = type { float, float, float, float }
+%dx.types.i8x16 = type { [16 x i8] }
+
+define void @main() {
+entry:
+  %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 0, i1 false)
+  %1 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %0, i32 0)
+  %2 = extractvalue %dx.types.CBufRet.f32 %1, 3
+  %3 = extractvalue %dx.types.CBufRet.f32 %1, 1
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %2)
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %3)
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %3)
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %3)
+  ret void
+}
+
+; Function Attrs: nounwind readonly
+declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #0
+
+; Function Attrs: nounwind readonly
+declare %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32, %dx.types.Handle, i32) #0
+
+; Function Attrs: nounwind
+declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #1
+
+attributes #0 = { nounwind readonly }
+attributes #1 = { nounwind }
+
+!dx.version = !{!0}
+!dx.valver = !{!0}
+!dx.shaderModel = !{!1}
+!dx.resources = !{!2}
+!dx.entryPoints = !{!5}
+!llvm.ident = !{!10}
+
+!0 = !{i32 1, i32 0}
+!1 = !{!"ps", i32 6, i32 0}
+!2 = !{null, null, !3, null}
+!3 = !{!4}
+!4 = !{i32 0, %dx.types.i8x16 addrspace(2)* undef, !"CB0", i32 0, i32 0, i32 1, i32 16, null}
+!5 = !{void ()* @main, !"main", !6, !2, null}
+!6 = !{null, !7, null}
+!7 = !{!8}
+!8 = !{i32 0, !"SV_Target", i8 9, i8 16, !9, i8 0, i32 1, i8 4, i32 0, i8 0, null}
+!9 = !{i32 0}
+!10 = !{!"dxbc2dxil 1.2"}

+ 21 - 0
projects/dxilconv/test/dxbc2dxil/cbuffer3.50.hlsl

@@ -0,0 +1,21 @@
+// RUN: %fxc /T ps_5_0 %s /Fo %t.dxbc
+// RUN: %dxbc2dxil %t.dxbc /emit-llvm /o %t.ll.converted
+// RUN: fc %b.ref %t.ll.converted
+
+
+
+
+cbuffer Foo
+{
+  float4 g1[16];
+};
+
+cbuffer Bar
+{
+  uint3 idx[8];
+};
+
+float4 main(int2 a : A) : SV_TARGET
+{
+  return g1[idx[a.x].z].wyyy;
+}

+ 71 - 0
projects/dxilconv/test/dxbc2dxil/cbuffer3.50.ref

@@ -0,0 +1,71 @@
+
+%dx.types.Handle = type { i8* }
+%dx.types.CBufRet.i32 = type { i32, i32, i32, i32 }
+%dx.types.CBufRet.f32 = type { float, float, float, float }
+%dx.types.i8x256 = type { [256 x i8] }
+%dx.types.i8x128 = type { [128 x i8] }
+
+define void @main() {
+entry:
+  %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 0, i1 false)
+  %1 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 1, i32 1, i1 false)
+  %2 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+  %3 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %1, i32 %2)
+  %4 = extractvalue %dx.types.CBufRet.i32 %3, 2
+  %5 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %0, i32 %4)
+  %6 = extractvalue %dx.types.CBufRet.f32 %5, 3
+  %7 = extractvalue %dx.types.CBufRet.f32 %5, 1
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %6)
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %7)
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %7)
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %7)
+  ret void
+}
+
+; Function Attrs: nounwind readonly
+declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #0
+
+; Function Attrs: nounwind readnone
+declare i32 @dx.op.loadInput.i32(i32, i32, i32, i8, i32) #1
+
+; Function Attrs: nounwind
+declare void @dx.op.tempRegStore.i32(i32, i32, i32) #2
+
+; Function Attrs: nounwind readonly
+declare i32 @dx.op.tempRegLoad.i32(i32, i32) #0
+
+; Function Attrs: nounwind readonly
+declare %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32, %dx.types.Handle, i32) #0
+
+; Function Attrs: nounwind readonly
+declare %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32, %dx.types.Handle, i32) #0
+
+; Function Attrs: nounwind
+declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #2
+
+attributes #0 = { nounwind readonly }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind }
+
+!dx.version = !{!0}
+!dx.valver = !{!0}
+!dx.shaderModel = !{!1}
+!dx.resources = !{!2}
+!dx.entryPoints = !{!6}
+!llvm.ident = !{!14}
+
+!0 = !{i32 1, i32 0}
+!1 = !{!"ps", i32 6, i32 0}
+!2 = !{null, null, !3, null}
+!3 = !{!4, !5}
+!4 = !{i32 0, %dx.types.i8x256 addrspace(2)* undef, !"CB0", i32 0, i32 0, i32 1, i32 256, null}
+!5 = !{i32 1, %dx.types.i8x128 addrspace(2)* undef, !"CB1", i32 0, i32 1, i32 1, i32 128, null}
+!6 = !{void ()* @main, !"main", !7, !2, !13}
+!7 = !{!8, !11, null}
+!8 = !{!9}
+!9 = !{i32 0, !"A", i8 4, i8 0, !10, i8 1, i32 1, i8 2, i32 0, i8 0, null}
+!10 = !{i32 0}
+!11 = !{!12}
+!12 = !{i32 0, !"SV_Target", i8 9, i8 16, !10, i8 0, i32 1, i8 4, i32 0, i8 0, null}
+!13 = !{i32 0, i64 256}
+!14 = !{!"dxbc2dxil 1.2"}

+ 26 - 0
projects/dxilconv/test/dxbc2dxil/cbuffer3.51.hlsl

@@ -0,0 +1,26 @@
+// RUN: %fxc /T ps_5_1 %s /Fo %t.dxbc
+// RUN: %dxbc2dxil %t.dxbc /emit-llvm /o %t.ll.converted
+// RUN: fc %b.ref %t.ll.converted
+
+
+
+
+struct Foo
+{
+  float4 g1[16];
+};
+
+struct Bar
+{
+  uint3 idx[16];
+};
+
+ConstantBuffer<Foo> buf1[32] : register(b77, space3);
+ConstantBuffer<Bar> buf2[64] : register(b17);
+
+[RootSignature("DescriptorTable(CBV(b17, numDescriptors=64, space=0), visibility=SHADER_VISIBILITY_ALL),\
+                DescriptorTable(CBV(b77, numDescriptors=32, space=3), visibility=SHADER_VISIBILITY_ALL)")]
+float4 main(int3 a : A) : SV_TARGET
+{
+  return buf1[ buf2[a.x].idx[a.y].z ].g1[a.z + 12].wyyy;
+}

+ 74 - 0
projects/dxilconv/test/dxbc2dxil/cbuffer3.51.ref

@@ -0,0 +1,74 @@
+
+%dx.types.Handle = type { i8* }
+%dx.types.CBufRet.i32 = type { i32, i32, i32, i32 }
+%dx.types.CBufRet.f32 = type { float, float, float, float }
+%dx.types.i8x256 = type { [256 x i8] }
+
+define void @main() {
+entry:
+  %0 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 2, i32 undef)
+  %1 = add i32 %0, 12
+  %2 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 1, i32 undef)
+  %3 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+  %4 = add i32 %3, 17
+  %5 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 %4, i1 false)
+  %6 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %5, i32 %2)
+  %7 = extractvalue %dx.types.CBufRet.i32 %6, 2
+  %8 = add i32 %7, 77
+  %9 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 1, i32 %8, i1 false)
+  %10 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %9, i32 %1)
+  %11 = extractvalue %dx.types.CBufRet.f32 %10, 3
+  %12 = extractvalue %dx.types.CBufRet.f32 %10, 1
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %11)
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %12)
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %12)
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %12)
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @dx.op.loadInput.i32(i32, i32, i32, i8, i32) #0
+
+; Function Attrs: nounwind
+declare void @dx.op.tempRegStore.i32(i32, i32, i32) #1
+
+; Function Attrs: nounwind readonly
+declare i32 @dx.op.tempRegLoad.i32(i32, i32) #2
+
+; Function Attrs: nounwind readonly
+declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #2
+
+; Function Attrs: nounwind readonly
+declare %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32, %dx.types.Handle, i32) #2
+
+; Function Attrs: nounwind readonly
+declare %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32, %dx.types.Handle, i32) #2
+
+; Function Attrs: nounwind
+declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #1
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+attributes #2 = { nounwind readonly }
+
+!dx.version = !{!0}
+!dx.valver = !{!0}
+!dx.shaderModel = !{!1}
+!dx.resources = !{!2}
+!dx.entryPoints = !{!6}
+!llvm.ident = !{!13}
+
+!0 = !{i32 1, i32 0}
+!1 = !{!"ps", i32 6, i32 0}
+!2 = !{null, null, !3, null}
+!3 = !{!4, !5}
+!4 = !{i32 0, %dx.types.i8x256 addrspace(2)* undef, !"CB0", i32 0, i32 17, i32 64, i32 256, null}
+!5 = !{i32 1, %dx.types.i8x256 addrspace(2)* undef, !"CB1", i32 3, i32 77, i32 32, i32 256, null}
+!6 = !{void ()* @main, !"main", !7, !2, null}
+!7 = !{!8, !11, null}
+!8 = !{!9}
+!9 = !{i32 0, !"A", i8 4, i8 0, !10, i8 1, i32 1, i8 3, i32 0, i8 0, null}
+!10 = !{i32 0}
+!11 = !{!12}
+!12 = !{i32 0, !"SV_Target", i8 9, i8 16, !10, i8 0, i32 1, i8 4, i32 0, i8 0, null}
+!13 = !{!"dxbc2dxil 1.2"}

+ 12 - 0
projects/dxilconv/test/dxbc2dxil/cmp1.hlsl

@@ -0,0 +1,12 @@
+// RUN: %fxc /T ps_5_0 %s /Fo %t.dxbc
+// RUN: %dxbc2dxil %t.dxbc /emit-llvm /o %t.ll.converted
+// RUN: fc %b.ref %t.ll.converted
+
+
+
+
+bool main(float a : A, float b : B) : SV_TARGET
+{
+  return a == b;
+}
+

+ 38 - 0
projects/dxilconv/test/dxbc2dxil/cmp1.ref

@@ -0,0 +1,38 @@
+
+define void @main() {
+entry:
+  %0 = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0, i32 undef)
+  %1 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+  %2 = fcmp fast oeq float %0, %1
+  %3 = sext i1 %2 to i32
+  call void @dx.op.storeOutput.i32(i32 5, i32 0, i32 0, i8 0, i32 %3)
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #0
+
+; Function Attrs: nounwind
+declare void @dx.op.storeOutput.i32(i32, i32, i32, i8, i32) #1
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+
+!dx.version = !{!0}
+!dx.valver = !{!0}
+!dx.shaderModel = !{!1}
+!dx.entryPoints = !{!2}
+!llvm.ident = !{!11}
+
+!0 = !{i32 1, i32 0}
+!1 = !{!"ps", i32 6, i32 0}
+!2 = !{void ()* @main, !"main", !3, null, !10}
+!3 = !{!4, !8, null}
+!4 = !{!5, !7}
+!5 = !{i32 0, !"A", i8 9, i8 0, !6, i8 2, i32 1, i8 1, i32 0, i8 0, null}
+!6 = !{i32 0}
+!7 = !{i32 1, !"B", i8 9, i8 0, !6, i8 2, i32 1, i8 1, i32 0, i8 1, null}
+!8 = !{!9}
+!9 = !{i32 0, !"SV_Target", i8 5, i8 16, !6, i8 0, i32 1, i8 1, i32 0, i8 0, null}
+!10 = !{i32 0, i64 256}
+!11 = !{!"dxbc2dxil 1.2"}

+ 11 - 0
projects/dxilconv/test/dxbc2dxil/constoperand1.hlsl

@@ -0,0 +1,11 @@
+// RUN: %fxc /T vs_5_0 %s /Fo %t.dxbc
+// RUN: %dxbc2dxil %t.dxbc /emit-llvm /o %t.ll.converted
+// RUN: fc %b.ref %t.ll.converted
+
+
+
+
+float4 main() : SV_POSITION
+{
+  return float4(3,0,0.5,0.12345);
+}

+ 30 - 0
projects/dxilconv/test/dxbc2dxil/constoperand1.ref

@@ -0,0 +1,30 @@
+
+define void @main() {
+entry:
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float 3.000000e+00)
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float 0.000000e+00)
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float 5.000000e-01)
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float 0x3FBF9A6B60000000)
+  ret void
+}
+
+; Function Attrs: nounwind
+declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
+
+attributes #0 = { nounwind }
+
+!dx.version = !{!0}
+!dx.valver = !{!0}
+!dx.shaderModel = !{!1}
+!dx.entryPoints = !{!2}
+!llvm.ident = !{!8}
+
+!0 = !{i32 1, i32 0}
+!1 = !{!"vs", i32 6, i32 0}
+!2 = !{void ()* @main, !"main", !3, null, !7}
+!3 = !{null, !4, null}
+!4 = !{!5}
+!5 = !{i32 0, !"SV_Position", i8 9, i8 3, !6, i8 0, i32 1, i8 4, i32 0, i8 0, null}
+!6 = !{i32 0}
+!7 = !{i32 0, i64 256}
+!8 = !{!"dxbc2dxil 1.2"}

Some files were not shown because too many files changed in this diff