2
0
Branimir Karadžić 10 жил өмнө
parent
commit
b702dcdf3c

+ 3 - 0
src/amalgamated.cpp

@@ -17,4 +17,7 @@
 #include "renderer_null.cpp"
 #include "renderer_gl.cpp"
 #include "renderer_vk.cpp"
+#include "shader_dxbc.cpp"
+#include "shader_dx9bc.cpp"
+#include "shader_spirv.cpp"
 #include "vertexdecl.cpp"

+ 2 - 0
src/bgfx_p.h

@@ -140,11 +140,13 @@ namespace bgfx
 #	include <tinystl/string.h>
 #	include <tinystl/unordered_map.h>
 #	include <tinystl/unordered_set.h>
+#	include <tinystl/vector.h>
 namespace stl = tinystl;
 #else
 #	include <string>
 #	include <unordered_map>
 #	include <unordered_set>
+#	include <vector>
 namespace stl
 {
 	using namespace std;

+ 3990 - 2
src/renderer_d3d12.cpp

@@ -6,10 +6,3998 @@
 #include "bgfx_p.h"
 
 #if BGFX_CONFIG_RENDERER_DIRECT3D12
-#	include "../../bgfx-ext/src/renderer_d3d12.cpp"
+#	include "renderer_d3d12.h"
+
+#	if !USE_D3D12_DYNAMIC_LIB
+#		pragma comment(lib, "D3D12.lib")
+#	endif // !USE_D3D12_DYNAMIC_LIB
+
+namespace bgfx { namespace d3d12
+{
+	static wchar_t s_viewNameW[BGFX_CONFIG_MAX_VIEWS][256];
+
+	struct PrimInfo
+	{
+		D3D_PRIMITIVE_TOPOLOGY m_toplogy;
+		D3D12_PRIMITIVE_TOPOLOGY_TYPE m_topologyType;
+		uint32_t m_min;
+		uint32_t m_div;
+		uint32_t m_sub;
+	};
+	
+	static const PrimInfo s_primInfo[] =
+	{
+		{ D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST,  D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE,  3, 3, 0 },
+		{ D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE,  3, 1, 2 },
+		{ D3D_PRIMITIVE_TOPOLOGY_LINELIST,      D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE,      2, 2, 0 },
+		{ D3D_PRIMITIVE_TOPOLOGY_POINTLIST,     D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT,     1, 1, 0 },
+		{ D3D_PRIMITIVE_TOPOLOGY_UNDEFINED,     D3D12_PRIMITIVE_TOPOLOGY_TYPE_UNDEFINED, 0, 0, 0 },
+	};
+
+	static const char* s_primName[] =
+	{
+		"TriList",
+		"TriStrip",
+		"Line",
+		"Point",
+	};
+	BX_STATIC_ASSERT(BX_COUNTOF(s_primInfo) == BX_COUNTOF(s_primName)+1);
+
+	static const uint32_t s_checkMsaa[] =
+	{
+		0,
+		2,
+		4,
+		8,
+		16,
+	};
+
+	static DXGI_SAMPLE_DESC s_msaa[] =
+	{
+		{  1, 0 },
+		{  2, 0 },
+		{  4, 0 },
+		{  8, 0 },
+		{ 16, 0 },
+	};
+
+	static const D3D12_BLEND s_blendFactor[][2] =
+	{
+		{ (D3D12_BLEND)0,               (D3D12_BLEND)0               }, // ignored
+		{ D3D12_BLEND_ZERO,             D3D12_BLEND_ZERO             }, // ZERO
+		{ D3D12_BLEND_ONE,              D3D12_BLEND_ONE              },	// ONE
+		{ D3D12_BLEND_SRC_COLOR,        D3D12_BLEND_SRC_ALPHA        },	// SRC_COLOR
+		{ D3D12_BLEND_INV_SRC_COLOR,    D3D12_BLEND_INV_SRC_ALPHA    },	// INV_SRC_COLOR
+		{ D3D12_BLEND_SRC_ALPHA,        D3D12_BLEND_SRC_ALPHA        },	// SRC_ALPHA
+		{ D3D12_BLEND_INV_SRC_ALPHA,    D3D12_BLEND_INV_SRC_ALPHA    },	// INV_SRC_ALPHA
+		{ D3D12_BLEND_DEST_ALPHA,       D3D12_BLEND_DEST_ALPHA       },	// DST_ALPHA
+		{ D3D12_BLEND_INV_DEST_ALPHA,   D3D12_BLEND_INV_DEST_ALPHA   },	// INV_DST_ALPHA
+		{ D3D12_BLEND_DEST_COLOR,       D3D12_BLEND_DEST_ALPHA       },	// DST_COLOR
+		{ D3D12_BLEND_INV_DEST_COLOR,   D3D12_BLEND_INV_DEST_ALPHA   },	// INV_DST_COLOR
+		{ D3D12_BLEND_SRC_ALPHA_SAT,    D3D12_BLEND_ONE              },	// SRC_ALPHA_SAT
+		{ D3D12_BLEND_BLEND_FACTOR,     D3D12_BLEND_BLEND_FACTOR     },	// FACTOR
+		{ D3D12_BLEND_INV_BLEND_FACTOR, D3D12_BLEND_INV_BLEND_FACTOR },	// INV_FACTOR
+	};
+
+	static const D3D12_BLEND_OP s_blendEquation[] =
+	{
+		D3D12_BLEND_OP_ADD,
+		D3D12_BLEND_OP_SUBTRACT,
+		D3D12_BLEND_OP_REV_SUBTRACT,
+		D3D12_BLEND_OP_MIN,
+		D3D12_BLEND_OP_MAX,
+	};
+
+	static const D3D12_COMPARISON_FUNC s_cmpFunc[] =
+	{
+		D3D12_COMPARISON_FUNC(0), // ignored
+		D3D12_COMPARISON_FUNC_LESS,
+		D3D12_COMPARISON_FUNC_LESS_EQUAL,
+		D3D12_COMPARISON_FUNC_EQUAL,
+		D3D12_COMPARISON_FUNC_GREATER_EQUAL,
+		D3D12_COMPARISON_FUNC_GREATER,
+		D3D12_COMPARISON_FUNC_NOT_EQUAL,
+		D3D12_COMPARISON_FUNC_NEVER,
+		D3D12_COMPARISON_FUNC_ALWAYS,
+	};
+
+	static const D3D12_STENCIL_OP s_stencilOp[] =
+	{
+		D3D12_STENCIL_OP_ZERO,
+		D3D12_STENCIL_OP_KEEP,
+		D3D12_STENCIL_OP_REPLACE,
+		D3D12_STENCIL_OP_INCR,
+		D3D12_STENCIL_OP_INCR_SAT,
+		D3D12_STENCIL_OP_DECR,
+		D3D12_STENCIL_OP_DECR_SAT,
+		D3D12_STENCIL_OP_INVERT,
+	};
+
+	static const D3D12_CULL_MODE s_cullMode[] =
+	{
+		D3D12_CULL_MODE_NONE,
+		D3D12_CULL_MODE_FRONT,
+		D3D12_CULL_MODE_BACK,
+	};
+
+	static const D3D12_TEXTURE_ADDRESS_MODE s_textureAddress[] =
+	{
+		D3D12_TEXTURE_ADDRESS_MODE_WRAP,
+		D3D12_TEXTURE_ADDRESS_MODE_MIRROR,
+		D3D12_TEXTURE_ADDRESS_MODE_CLAMP,
+	};
+
+	/*
+	 * D3D11_FILTER_MIN_MAG_MIP_POINT               = 0x00,
+	 * D3D11_FILTER_MIN_MAG_POINT_MIP_LINEAR        = 0x01,
+	 * D3D11_FILTER_MIN_POINT_MAG_LINEAR_MIP_POINT  = 0x04,
+	 * D3D11_FILTER_MIN_POINT_MAG_MIP_LINEAR        = 0x05,
+	 * D3D11_FILTER_MIN_LINEAR_MAG_MIP_POINT        = 0x10,
+	 * D3D11_FILTER_MIN_LINEAR_MAG_POINT_MIP_LINEAR = 0x11,
+	 * D3D11_FILTER_MIN_MAG_LINEAR_MIP_POINT        = 0x14,
+	 * D3D11_FILTER_MIN_MAG_MIP_LINEAR              = 0x15,
+	 * D3D11_FILTER_ANISOTROPIC                     = 0x55,
+	 *
+	 * D3D11_COMPARISON_FILTERING_BIT               = 0x80,
+	 * D3D11_ANISOTROPIC_FILTERING_BIT              = 0x40,
+	 *
+	 * According to D3D11_FILTER enum bits for mip, mag and mip are:
+	 * 0x10 // MIN_LINEAR
+	 * 0x04 // MAG_LINEAR
+	 * 0x01 // MIP_LINEAR
+	 */
+
+	static const uint8_t s_textureFilter[3][3] =
+	{
+		{
+			0x10, // min linear
+			0x00, // min point
+			0x55, // anisotropic
+		},
+		{
+			0x04, // mag linear
+			0x00, // mag point
+			0x55, // anisotropic
+		},
+		{
+			0x01, // mip linear
+			0x00, // mip point
+			0x55, // anisotropic
+		},
+	};
+
+	struct TextureFormatInfo
+	{
+		DXGI_FORMAT m_fmt;
+		DXGI_FORMAT m_fmtSrv;
+		DXGI_FORMAT m_fmtDsv;
+	};
+
+	static const TextureFormatInfo s_textureFormat[] =
+	{
+		{ DXGI_FORMAT_BC1_UNORM,          DXGI_FORMAT_BC1_UNORM,             DXGI_FORMAT_UNKNOWN           }, // BC1
+		{ DXGI_FORMAT_BC2_UNORM,          DXGI_FORMAT_BC2_UNORM,             DXGI_FORMAT_UNKNOWN           }, // BC2
+		{ DXGI_FORMAT_BC3_UNORM,          DXGI_FORMAT_BC3_UNORM,             DXGI_FORMAT_UNKNOWN           }, // BC3
+		{ DXGI_FORMAT_BC4_UNORM,          DXGI_FORMAT_BC4_UNORM,             DXGI_FORMAT_UNKNOWN           }, // BC4
+		{ DXGI_FORMAT_BC5_UNORM,          DXGI_FORMAT_BC5_UNORM,             DXGI_FORMAT_UNKNOWN           }, // BC5
+		{ DXGI_FORMAT_BC6H_SF16,          DXGI_FORMAT_BC6H_SF16,             DXGI_FORMAT_UNKNOWN           }, // BC6H
+		{ DXGI_FORMAT_BC7_UNORM,          DXGI_FORMAT_BC7_UNORM,             DXGI_FORMAT_UNKNOWN           }, // BC7
+		{ DXGI_FORMAT_UNKNOWN,            DXGI_FORMAT_UNKNOWN,               DXGI_FORMAT_UNKNOWN           }, // ETC1
+		{ DXGI_FORMAT_UNKNOWN,            DXGI_FORMAT_UNKNOWN,               DXGI_FORMAT_UNKNOWN           }, // ETC2
+		{ DXGI_FORMAT_UNKNOWN,            DXGI_FORMAT_UNKNOWN,               DXGI_FORMAT_UNKNOWN           }, // ETC2A
+		{ DXGI_FORMAT_UNKNOWN,            DXGI_FORMAT_UNKNOWN,               DXGI_FORMAT_UNKNOWN           }, // ETC2A1
+		{ DXGI_FORMAT_UNKNOWN,            DXGI_FORMAT_UNKNOWN,               DXGI_FORMAT_UNKNOWN           }, // PTC12
+		{ DXGI_FORMAT_UNKNOWN,            DXGI_FORMAT_UNKNOWN,               DXGI_FORMAT_UNKNOWN           }, // PTC14
+		{ DXGI_FORMAT_UNKNOWN,            DXGI_FORMAT_UNKNOWN,               DXGI_FORMAT_UNKNOWN           }, // PTC12A
+		{ DXGI_FORMAT_UNKNOWN,            DXGI_FORMAT_UNKNOWN,               DXGI_FORMAT_UNKNOWN           }, // PTC14A
+		{ DXGI_FORMAT_UNKNOWN,            DXGI_FORMAT_UNKNOWN,               DXGI_FORMAT_UNKNOWN           }, // PTC22
+		{ DXGI_FORMAT_UNKNOWN,            DXGI_FORMAT_UNKNOWN,               DXGI_FORMAT_UNKNOWN           }, // PTC24
+		{ DXGI_FORMAT_UNKNOWN,            DXGI_FORMAT_UNKNOWN,               DXGI_FORMAT_UNKNOWN           }, // Unknown
+		{ DXGI_FORMAT_R1_UNORM,           DXGI_FORMAT_R1_UNORM,              DXGI_FORMAT_UNKNOWN           }, // R1
+		{ DXGI_FORMAT_R8_UNORM,           DXGI_FORMAT_R8_UNORM,              DXGI_FORMAT_UNKNOWN           }, // R8
+		{ DXGI_FORMAT_R16_UNORM,          DXGI_FORMAT_R16_UNORM,             DXGI_FORMAT_UNKNOWN           }, // R16
+		{ DXGI_FORMAT_R16_FLOAT,          DXGI_FORMAT_R16_FLOAT,             DXGI_FORMAT_UNKNOWN           }, // R16F
+		{ DXGI_FORMAT_R32_UINT,           DXGI_FORMAT_R32_UINT,              DXGI_FORMAT_UNKNOWN           }, // R32
+		{ DXGI_FORMAT_R32_FLOAT,          DXGI_FORMAT_R32_FLOAT,             DXGI_FORMAT_UNKNOWN           }, // R32F
+		{ DXGI_FORMAT_R8G8_UNORM,         DXGI_FORMAT_R8G8_UNORM,            DXGI_FORMAT_UNKNOWN           }, // RG8
+		{ DXGI_FORMAT_R16G16_UNORM,       DXGI_FORMAT_R16G16_UNORM,          DXGI_FORMAT_UNKNOWN           }, // RG16
+		{ DXGI_FORMAT_R16G16_FLOAT,       DXGI_FORMAT_R16G16_FLOAT,          DXGI_FORMAT_UNKNOWN           }, // RG16F
+		{ DXGI_FORMAT_R32G32_UINT,        DXGI_FORMAT_R32G32_UINT,           DXGI_FORMAT_UNKNOWN           }, // RG32
+		{ DXGI_FORMAT_R32G32_FLOAT,       DXGI_FORMAT_R32G32_FLOAT,          DXGI_FORMAT_UNKNOWN           }, // RG32F
+		{ DXGI_FORMAT_B8G8R8A8_UNORM,     DXGI_FORMAT_B8G8R8A8_UNORM,        DXGI_FORMAT_UNKNOWN           }, // BGRA8
+		{ DXGI_FORMAT_R8G8B8A8_UNORM,     DXGI_FORMAT_R8G8B8A8_UNORM,        DXGI_FORMAT_UNKNOWN           }, // RGBA8
+		{ DXGI_FORMAT_R16G16B16A16_UNORM, DXGI_FORMAT_R16G16B16A16_UNORM,    DXGI_FORMAT_UNKNOWN           }, // RGBA16
+		{ DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_FLOAT,    DXGI_FORMAT_UNKNOWN           }, // RGBA16F
+		{ DXGI_FORMAT_R32G32B32A32_UINT,  DXGI_FORMAT_R32G32B32A32_UINT,     DXGI_FORMAT_UNKNOWN           }, // RGBA32
+		{ DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_R32G32B32A32_FLOAT,    DXGI_FORMAT_UNKNOWN           }, // RGBA32F
+		{ DXGI_FORMAT_B5G6R5_UNORM,       DXGI_FORMAT_B5G6R5_UNORM,          DXGI_FORMAT_UNKNOWN           }, // R5G6B5
+		{ DXGI_FORMAT_B4G4R4A4_UNORM,     DXGI_FORMAT_B4G4R4A4_UNORM,        DXGI_FORMAT_UNKNOWN           }, // RGBA4
+		{ DXGI_FORMAT_B5G5R5A1_UNORM,     DXGI_FORMAT_B5G5R5A1_UNORM,        DXGI_FORMAT_UNKNOWN           }, // RGB5A1
+		{ DXGI_FORMAT_R10G10B10A2_UNORM,  DXGI_FORMAT_R10G10B10A2_UNORM,     DXGI_FORMAT_UNKNOWN           }, // RGB10A2
+		{ DXGI_FORMAT_R11G11B10_FLOAT,    DXGI_FORMAT_R11G11B10_FLOAT,       DXGI_FORMAT_UNKNOWN           }, // R11G11B10F
+		{ DXGI_FORMAT_UNKNOWN,            DXGI_FORMAT_UNKNOWN,               DXGI_FORMAT_UNKNOWN           }, // UnknownDepth
+		{ DXGI_FORMAT_R16_TYPELESS,       DXGI_FORMAT_R16_UNORM,             DXGI_FORMAT_D16_UNORM         }, // D16
+		{ DXGI_FORMAT_R24G8_TYPELESS,     DXGI_FORMAT_R24_UNORM_X8_TYPELESS, DXGI_FORMAT_D24_UNORM_S8_UINT }, // D24
+		{ DXGI_FORMAT_R24G8_TYPELESS,     DXGI_FORMAT_R24_UNORM_X8_TYPELESS, DXGI_FORMAT_D24_UNORM_S8_UINT }, // D24S8
+		{ DXGI_FORMAT_R24G8_TYPELESS,     DXGI_FORMAT_R24_UNORM_X8_TYPELESS, DXGI_FORMAT_D24_UNORM_S8_UINT }, // D32
+		{ DXGI_FORMAT_R32_TYPELESS,       DXGI_FORMAT_R32_FLOAT,             DXGI_FORMAT_D32_FLOAT         }, // D16F
+		{ DXGI_FORMAT_R32_TYPELESS,       DXGI_FORMAT_R32_FLOAT,             DXGI_FORMAT_D32_FLOAT         }, // D24F
+		{ DXGI_FORMAT_R32_TYPELESS,       DXGI_FORMAT_R32_FLOAT,             DXGI_FORMAT_D32_FLOAT         }, // D32F
+		{ DXGI_FORMAT_R24G8_TYPELESS,     DXGI_FORMAT_R24_UNORM_X8_TYPELESS, DXGI_FORMAT_D24_UNORM_S8_UINT }, // D0S8
+	};
+	BX_STATIC_ASSERT(TextureFormat::Count == BX_COUNTOF(s_textureFormat) );
+
+	static const D3D12_INPUT_ELEMENT_DESC s_attrib[] =
+	{
+		{ "POSITION",     0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 },
+		{ "NORMAL",       0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 },
+		{ "TANGENT",      0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 },
+		{ "BITANGENT",    0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 },
+		{ "COLOR",        0, DXGI_FORMAT_R8G8B8A8_UINT,   0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 },
+		{ "COLOR",        1, DXGI_FORMAT_R8G8B8A8_UINT,   0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 },
+		{ "BLENDINDICES", 0, DXGI_FORMAT_R8G8B8A8_UINT,   0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 },
+		{ "BLENDWEIGHT",  0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 },
+		{ "TEXCOORD",     0, DXGI_FORMAT_R32G32_FLOAT,    0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 },
+		{ "TEXCOORD",     1, DXGI_FORMAT_R32G32_FLOAT,    0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 },
+		{ "TEXCOORD",     2, DXGI_FORMAT_R32G32_FLOAT,    0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 },
+		{ "TEXCOORD",     3, DXGI_FORMAT_R32G32_FLOAT,    0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 },
+		{ "TEXCOORD",     4, DXGI_FORMAT_R32G32_FLOAT,    0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 },
+		{ "TEXCOORD",     5, DXGI_FORMAT_R32G32_FLOAT,    0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 },
+		{ "TEXCOORD",     6, DXGI_FORMAT_R32G32_FLOAT,    0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 },
+		{ "TEXCOORD",     7, DXGI_FORMAT_R32G32_FLOAT,    0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 },
+	};
+	BX_STATIC_ASSERT(Attrib::Count == BX_COUNTOF(s_attrib) );
+
+	static const DXGI_FORMAT s_attribType[][4][2] =
+	{
+		{
+			{ DXGI_FORMAT_R8_UINT,            DXGI_FORMAT_R8_UNORM           },
+			{ DXGI_FORMAT_R8G8_UINT,          DXGI_FORMAT_R8G8_UNORM         },
+			{ DXGI_FORMAT_R8G8B8A8_UINT,      DXGI_FORMAT_R8G8B8A8_UNORM     },
+			{ DXGI_FORMAT_R8G8B8A8_UINT,      DXGI_FORMAT_R8G8B8A8_UNORM     },
+		},
+		{
+			{ DXGI_FORMAT_R16_SINT,           DXGI_FORMAT_R16_SNORM          },
+			{ DXGI_FORMAT_R16G16_SINT,        DXGI_FORMAT_R16G16_SNORM       },
+			{ DXGI_FORMAT_R16G16B16A16_SINT,  DXGI_FORMAT_R16G16B16A16_SNORM },
+			{ DXGI_FORMAT_R16G16B16A16_SINT,  DXGI_FORMAT_R16G16B16A16_SNORM },
+		},
+		{
+			{ DXGI_FORMAT_R16_FLOAT,          DXGI_FORMAT_R16_FLOAT          },
+			{ DXGI_FORMAT_R16G16_FLOAT,       DXGI_FORMAT_R16G16_FLOAT       },
+			{ DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_FLOAT },
+			{ DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_FLOAT },
+		},
+		{
+			{ DXGI_FORMAT_R32_FLOAT,          DXGI_FORMAT_R32_FLOAT          },
+			{ DXGI_FORMAT_R32G32_FLOAT,       DXGI_FORMAT_R32G32_FLOAT       },
+			{ DXGI_FORMAT_R32G32B32_FLOAT,    DXGI_FORMAT_R32G32B32_FLOAT    },
+			{ DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_R32G32B32A32_FLOAT },
+		},
+	};
+	BX_STATIC_ASSERT(AttribType::Count == BX_COUNTOF(s_attribType) );
+
+	static D3D12_INPUT_ELEMENT_DESC* fillVertexDecl(D3D12_INPUT_ELEMENT_DESC* _out, const VertexDecl& _decl)
+	{
+		D3D12_INPUT_ELEMENT_DESC* elem = _out;
+
+		for (uint32_t attr = 0; attr < Attrib::Count; ++attr)
+		{
+			if (0xff != _decl.m_attributes[attr])
+			{
+				memcpy(elem, &s_attrib[attr], sizeof(D3D12_INPUT_ELEMENT_DESC) );
+
+				if (0 == _decl.m_attributes[attr])
+				{
+					elem->AlignedByteOffset = 0;
+				}
+				else
+				{
+					uint8_t num;
+					AttribType::Enum type;
+					bool normalized;
+					bool asInt;
+					_decl.decode(Attrib::Enum(attr), num, type, normalized, asInt);
+					elem->Format = s_attribType[type][num-1][normalized];
+					elem->AlignedByteOffset = _decl.m_offset[attr];
+				}
+
+				++elem;
+			}
+		}
+
+		return elem;
+	}
+
+	void setResourceBarrier(ID3D12GraphicsCommandList* _commandList, ID3D12Resource* _resource, D3D12_RESOURCE_STATES _stateBefore, D3D12_RESOURCE_STATES _stateAfter)
+	{
+		D3D12_RESOURCE_BARRIER barrier;
+		barrier.Type  = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
+		barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
+		barrier.Transition.pResource   = _resource;
+		barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
+		barrier.Transition.StateBefore = _stateBefore;
+		barrier.Transition.StateAfter  = _stateAfter;
+		_commandList->ResourceBarrier(1, &barrier);
+	}
+
+	struct HeapProperty
+	{
+		enum Enum
+		{
+			Default,
+			Upload,
+
+			Count
+		};
+
+		D3D12_HEAP_PROPERTIES m_properties;
+		D3D12_RESOURCE_STATES m_state;
+	};
+
+	static const HeapProperty s_heapProperties[] =
+	{
+		{ { D3D12_HEAP_TYPE_DEFAULT, D3D12_CPU_PAGE_PROPERTY_UNKNOWN, D3D12_MEMORY_POOL_UNKNOWN, 1, 1 }, D3D12_RESOURCE_STATE_COMMON       },
+		{ { D3D12_HEAP_TYPE_UPLOAD,  D3D12_CPU_PAGE_PROPERTY_UNKNOWN, D3D12_MEMORY_POOL_UNKNOWN, 1, 1 }, D3D12_RESOURCE_STATE_GENERIC_READ },
+	};
+	BX_STATIC_ASSERT(BX_COUNTOF(s_heapProperties) == HeapProperty::Count);
+
+	ID3D12Resource* createCommittedResource(ID3D12Device* _device, HeapProperty::Enum _heapProperty, D3D12_RESOURCE_DESC* _resourceDesc, D3D12_CLEAR_VALUE* _clearValue)
+	{
+		const HeapProperty& heapProperty = s_heapProperties[_heapProperty];
+		ID3D12Resource* resource;
+		DX_CHECK(_device->CreateCommittedResource(&heapProperty.m_properties
+				, D3D12_HEAP_FLAG_NONE
+				, _resourceDesc
+				, heapProperty.m_state
+				, _clearValue
+				, __uuidof(ID3D12Resource)
+				, (void**)&resource
+				) );
+
+		return resource;
+	}
+
+	ID3D12Resource* createCommittedResource(ID3D12Device* _device, HeapProperty::Enum _heapProperty, uint64_t _size, D3D12_RESOURCE_FLAGS _flags = D3D12_RESOURCE_FLAG_NONE)
+	{
+		D3D12_RESOURCE_DESC resourceDesc;
+		resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
+		resourceDesc.Alignment = 0;
+		resourceDesc.Width     = _size;
+		resourceDesc.Height    = 1;
+		resourceDesc.DepthOrArraySize = 1;
+		resourceDesc.MipLevels = 1;
+		resourceDesc.Format             = DXGI_FORMAT_UNKNOWN;
+		resourceDesc.SampleDesc.Count   = 1;
+		resourceDesc.SampleDesc.Quality = 0;
+		resourceDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
+		resourceDesc.Flags  = _flags;
+
+		return createCommittedResource(_device, _heapProperty, &resourceDesc, NULL);
+	}
+
+	BX_NO_INLINE void setDebugObjectName(ID3D12Object* _object, const char* _format, ...)
+	{
+		if (BX_ENABLED(BGFX_CONFIG_DEBUG_OBJECT_NAME) )
+		{
+			char temp[2048];
+			va_list argList;
+			va_start(argList, _format);
+			int size = bx::uint32_min(sizeof(temp)-1, vsnprintf(temp, sizeof(temp), _format, argList) );
+			va_end(argList);
+			temp[size] = '\0';
+
+			wchar_t* wtemp = (wchar_t*)alloca( (size+1)*2);
+			mbstowcs(wtemp, temp, size+1);
+			_object->SetName(wtemp);
+		}
+	}
+
+#if USE_D3D12_DYNAMIC_LIB
+	static PFN_D3D12_CREATE_DEVICE            D3D12CreateDevice;
+	static PFN_D3D12_GET_DEBUG_INTERFACE      D3D12GetDebugInterface;
+	static PFN_D3D12_SERIALIZE_ROOT_SIGNATURE D3D12SerializeRootSignature;
+	static PFN_CREATE_DXGI_FACTORY            CreateDXGIFactory1;
+#endif // USE_D3D12_DYNAMIC_LIB
+
+	struct RendererContextD3D12 : public RendererContextI
+	{
+		RendererContextD3D12()
+			: m_wireframe(false)
+			, m_flags(BGFX_RESET_NONE)
+			, m_fsChanges(0)
+			, m_vsChanges(0)
+			, m_frame(0)
+			, m_backBufferColorIdx(0)
+			, m_rtMsaa(false)
+		{
+		}
+
+		void init()
+		{
+			m_fbh.idx = invalidHandle;
+			memset(m_uniforms, 0, sizeof(m_uniforms) );
+			memset(&m_resolution, 0, sizeof(m_resolution) );
+
+#if USE_D3D12_DYNAMIC_LIB
+			m_d3d12dll = bx::dlopen("d3d12.dll");
+			BGFX_FATAL(NULL != m_d3d12dll, Fatal::UnableToInitialize, "Failed to load d3d12.dll.");
+
+			D3D12CreateDevice = (PFN_D3D12_CREATE_DEVICE)bx::dlsym(m_d3d12dll, "D3D12CreateDevice");
+			BGFX_FATAL(NULL != D3D12CreateDevice, Fatal::UnableToInitialize, "Function D3D12CreateDevice not found.");
+
+			D3D12GetDebugInterface = (PFN_D3D12_GET_DEBUG_INTERFACE)bx::dlsym(m_d3d12dll, "D3D12GetDebugInterface");
+			BGFX_FATAL(NULL != D3D12GetDebugInterface, Fatal::UnableToInitialize, "Function D3D12GetDebugInterface not found.");
+
+			D3D12SerializeRootSignature = (PFN_D3D12_SERIALIZE_ROOT_SIGNATURE)bx::dlsym(m_d3d12dll, "D3D12SerializeRootSignature");
+			BGFX_FATAL(NULL != D3D12SerializeRootSignature, Fatal::UnableToInitialize, "Function D3D12SerializeRootSignature not found.");
+
+			m_dxgidll = bx::dlopen("dxgi.dll");
+			BGFX_FATAL(NULL != m_dxgidll, Fatal::UnableToInitialize, "Failed to load dxgi.dll.");
+
+			CreateDXGIFactory1 = (PFN_CREATE_DXGI_FACTORY)bx::dlsym(m_dxgidll, "CreateDXGIFactory1");
+			BGFX_FATAL(NULL != CreateDXGIFactory1, Fatal::UnableToInitialize, "Function CreateDXGIFactory1 not found.");
+#endif // USE_D3D12_DYNAMIC_LIB
+
+			HRESULT hr;
+
+			hr = CreateDXGIFactory1(__uuidof(IDXGIFactory), (void**)&m_factory);
+			BGFX_FATAL(SUCCEEDED(hr), Fatal::UnableToInitialize, "Unable to create DXGI factory.");
+
+			m_adapter = NULL;
+			m_driverType = D3D_DRIVER_TYPE_HARDWARE;
+
+			IDXGIAdapter* adapter;
+			for (uint32_t ii = 0; DXGI_ERROR_NOT_FOUND != m_factory->EnumAdapters(ii, &adapter); ++ii)
+			{
+				DXGI_ADAPTER_DESC desc;
+				hr = adapter->GetDesc(&desc);
+				if (SUCCEEDED(hr) )
+				{
+					BX_TRACE("Adapter #%d", ii);
+
+					char description[BX_COUNTOF(desc.Description)];
+					wcstombs(description, desc.Description, BX_COUNTOF(desc.Description) );
+					BX_TRACE("\tDescription: %s", description);
+					BX_TRACE("\tVendorId: 0x%08x, DeviceId: 0x%08x, SubSysId: 0x%08x, Revision: 0x%08x"
+							, desc.VendorId
+							, desc.DeviceId
+							, desc.SubSysId
+							, desc.Revision
+							);
+					BX_TRACE("\tMemory: %" PRIi64 " (video), %" PRIi64 " (system), %" PRIi64 " (shared)"
+							, desc.DedicatedVideoMemory
+							, desc.DedicatedSystemMemory
+							, desc.SharedSystemMemory
+							);
+
+					g_caps.gpu[ii].vendorId = (uint16_t)desc.VendorId;
+					g_caps.gpu[ii].deviceId = (uint16_t)desc.DeviceId;
+					++g_caps.numGPUs;
+
+					if ( (BGFX_PCI_ID_NONE != g_caps.vendorId ||             0 != g_caps.deviceId)
+					&&   (BGFX_PCI_ID_NONE == g_caps.vendorId || desc.VendorId == g_caps.vendorId)
+					&&   (0 == g_caps.deviceId                || desc.DeviceId == g_caps.deviceId) )
+					{
+						m_adapter = adapter;
+						m_adapter->AddRef();
+						m_driverType = D3D_DRIVER_TYPE_UNKNOWN;
+					}
+
+					if (BX_ENABLED(BGFX_CONFIG_DEBUG_PERFHUD)
+					&&  0 != strstr(description, "PerfHUD") )
+					{
+						m_adapter = adapter;
+						m_driverType = D3D_DRIVER_TYPE_REFERENCE;
+					}
+				}
+
+				DX_RELEASE(adapter, adapter == m_adapter ? 1 : 0);
+			}
+
+			if (BX_ENABLED(BGFX_CONFIG_DEBUG) )
+			{
+				ID3D12Debug* debug;
+				hr = D3D12GetDebugInterface(__uuidof(ID3D12Debug), (void**)&debug);
+
+				if (SUCCEEDED(hr) )
+				{
+					debug->EnableDebugLayer();
+				}
+			}
+
+			hr = D3D12CreateDevice(m_adapter
+					, D3D_FEATURE_LEVEL_11_0
+					, __uuidof(ID3D12Device)
+					, (void**)&m_device
+					);
+			BGFX_FATAL(SUCCEEDED(hr), Fatal::UnableToInitialize, "Unable to create Direct3D12 device.");
+
+			if (NULL != m_adapter)
+			{
+				DX_RELEASE(m_adapter, 2);
+			}
+
+			memset(&m_adapterDesc, 0, sizeof(m_adapterDesc) );
+			LUID luid = m_device->GetAdapterLuid();
+			for (uint32_t ii = 0; DXGI_ERROR_NOT_FOUND != m_factory->EnumAdapters(ii, &adapter); ++ii)
+			{
+				adapter->GetDesc(&m_adapterDesc);
+				if (m_adapterDesc.AdapterLuid.LowPart  == luid.LowPart
+				&&  m_adapterDesc.AdapterLuid.HighPart == luid.HighPart)
+				{
+					break;
+				}
+			}
+
+			g_caps.vendorId = (uint16_t)m_adapterDesc.VendorId;
+			g_caps.deviceId = (uint16_t)m_adapterDesc.DeviceId;
+
+			m_architecture.NodeIndex = 0;
+			DX_CHECK(m_device->CheckFeatureSupport(D3D12_FEATURE_ARCHITECTURE, &m_architecture, sizeof(m_architecture) ) );
+			BX_TRACE("GPU Architecture, TileBasedRenderer %d, UMA %d, CacheCoherentUMA %d"
+					, m_architecture.TileBasedRenderer
+					, m_architecture.UMA
+					, m_architecture.CacheCoherentUMA
+					);
+
+			DX_CHECK(m_device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS, &m_options, sizeof(m_options) ) );
+
+			m_cmd.init(m_device);
+
+			m_scd.BufferDesc.Width  = BGFX_DEFAULT_WIDTH;
+			m_scd.BufferDesc.Height = BGFX_DEFAULT_HEIGHT;
+			m_scd.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
+			m_scd.BufferDesc.Scaling                 = DXGI_MODE_SCALING_STRETCHED;
+			m_scd.BufferDesc.ScanlineOrdering        = DXGI_MODE_SCANLINE_ORDER_UNSPECIFIED;
+			m_scd.BufferDesc.RefreshRate.Numerator   = 60;
+			m_scd.BufferDesc.RefreshRate.Denominator = 1;
+			m_scd.SampleDesc.Count   = 1;
+			m_scd.SampleDesc.Quality = 0;
+			m_scd.BufferUsage  = DXGI_USAGE_RENDER_TARGET_OUTPUT;
+			m_scd.BufferCount  = bx::uint32_min(BX_COUNTOF(m_backBufferColor), 4);
+			m_scd.OutputWindow = (HWND)g_platformData.nwh;
+			m_scd.Windowed     = true;
+			m_scd.SwapEffect   = DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL;
+			m_scd.Flags        = DXGI_SWAP_CHAIN_FLAG_ALLOW_MODE_SWITCH;
+
+			BX_CHECK(m_scd.BufferCount <= BX_COUNTOF(m_backBufferColor), "Swap chain buffer count %d (max %d)."
+					, m_scd.BufferCount
+					, BX_COUNTOF(m_backBufferColor)
+					);
+			hr = m_factory->CreateSwapChain(m_cmd.m_commandQueue
+					, &m_scd
+					, &m_swapChain
+					);
+			BGFX_FATAL(SUCCEEDED(hr), Fatal::UnableToInitialize, "Failed to create swap chain.");
+			m_resolution.m_width  = BGFX_DEFAULT_WIDTH;
+			m_resolution.m_height = BGFX_DEFAULT_HEIGHT;
+
+			DX_CHECK(m_factory->MakeWindowAssociation( (HWND)g_platformData.nwh
+					, 0
+					| DXGI_MWA_NO_WINDOW_CHANGES
+					| DXGI_MWA_NO_ALT_ENTER
+					) );
+
+			m_numWindows = 1;
+
+			if (BX_ENABLED(BGFX_CONFIG_DEBUG) )
+			{
+				hr = m_device->QueryInterface(__uuidof(ID3D12InfoQueue), (void**)&m_infoQueue);
+
+				if (SUCCEEDED(hr) )
+				{
+					m_infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_CORRUPTION, true);
+					m_infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_ERROR,      false);
+					m_infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_WARNING,    false);
+
+					D3D12_INFO_QUEUE_FILTER filter;
+					memset(&filter, 0, sizeof(filter) );
+
+					D3D12_MESSAGE_CATEGORY catlist[] =
+					{
+						D3D12_MESSAGE_CATEGORY_STATE_SETTING,
+						D3D12_MESSAGE_CATEGORY_EXECUTION,
+					};
+					filter.DenyList.NumCategories = BX_COUNTOF(catlist);
+					filter.DenyList.pCategoryList = catlist;
+					m_infoQueue->PushStorageFilter(&filter);
+
+					DX_RELEASE(m_infoQueue, 19);
+				}
+			}
+
+			D3D12_DESCRIPTOR_HEAP_DESC rtvDescHeap;
+			rtvDescHeap.NumDescriptors = 0
+					+ BX_COUNTOF(m_backBufferColor)
+					+ BGFX_CONFIG_MAX_FRAME_BUFFERS*BGFX_CONFIG_MAX_FRAME_BUFFER_ATTACHMENTS
+					;
+			rtvDescHeap.Type     = D3D12_DESCRIPTOR_HEAP_TYPE_RTV;
+			rtvDescHeap.Flags    = D3D12_DESCRIPTOR_HEAP_FLAG_NONE;
+			rtvDescHeap.NodeMask = 0;
+			DX_CHECK(m_device->CreateDescriptorHeap(&rtvDescHeap
+					, __uuidof(ID3D12DescriptorHeap)
+					, (void**)&m_rtvDescriptorHeap
+					) );
+
+			D3D12_DESCRIPTOR_HEAP_DESC dsvDescHeap;
+			dsvDescHeap.NumDescriptors = 0
+					+ 1 // reserved for depth backbuffer.
+					+ BGFX_CONFIG_MAX_FRAME_BUFFERS
+					;
+			dsvDescHeap.Type     = D3D12_DESCRIPTOR_HEAP_TYPE_DSV;
+			dsvDescHeap.Flags    = D3D12_DESCRIPTOR_HEAP_FLAG_NONE;
+			dsvDescHeap.NodeMask = 0;
+			DX_CHECK(m_device->CreateDescriptorHeap(&dsvDescHeap
+					, __uuidof(ID3D12DescriptorHeap)
+					, (void**)&m_dsvDescriptorHeap
+					) );
+
+			for (uint32_t ii = 0; ii < BX_COUNTOF(m_scratchBuffer); ++ii)
+			{
+				m_scratchBuffer[ii].create(BGFX_CONFIG_MAX_DRAW_CALLS*1024
+						, BGFX_CONFIG_MAX_TEXTURES + BGFX_CONFIG_MAX_SHADERS + BGFX_CONFIG_MAX_DRAW_CALLS
+						);
+			}
+			m_samplerAllocator.create(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER
+					, 1024
+					, BGFX_CONFIG_MAX_TEXTURE_SAMPLERS
+					);
+
+			D3D12_DESCRIPTOR_RANGE descRange[] =
+			{
+				{ D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, BGFX_CONFIG_MAX_TEXTURE_SAMPLERS, 0, 0, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND },
+				{ D3D12_DESCRIPTOR_RANGE_TYPE_SRV,     BGFX_CONFIG_MAX_TEXTURE_SAMPLERS, 0, 0, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND },
+				{ D3D12_DESCRIPTOR_RANGE_TYPE_CBV,     1,                                0, 0, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND },
+				{ D3D12_DESCRIPTOR_RANGE_TYPE_UAV,     BGFX_CONFIG_MAX_TEXTURE_SAMPLERS, 0, 0, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND },
+			};
+			BX_STATIC_ASSERT(BX_COUNTOF(descRange) == Rdt::Count);
+
+			D3D12_ROOT_PARAMETER rootParameter[] =
+			{
+				{ D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE, { 1, &descRange[Rdt::Sampler] }, D3D12_SHADER_VISIBILITY_ALL    },
+				{ D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE, { 1, &descRange[Rdt::SRV]     }, D3D12_SHADER_VISIBILITY_ALL    },
+				{ D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE, { 1, &descRange[Rdt::CBV]     }, D3D12_SHADER_VISIBILITY_ALL    },
+//				{ D3D12_ROOT_PARAMETER_TYPE_CBV,              { 0, 0                        }, D3D12_SHADER_VISIBILITY_ALL    },
+				{ D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE, { 1, &descRange[Rdt::UAV]     }, D3D12_SHADER_VISIBILITY_ALL    },
+			};
+// 			rootParameter[Rdt::CBV].Constants.ShaderRegister = 0;
+// 			rootParameter[Rdt::CBV].Constants.RegisterSpace  = 100;
+// 			rootParameter[Rdt::CBV].Constants.Num32BitValues = 0;
+
+			D3D12_ROOT_SIGNATURE_DESC descRootSignature;
+			descRootSignature.NumParameters = BX_COUNTOF(rootParameter);
+			descRootSignature.pParameters   = rootParameter;
+			descRootSignature.NumStaticSamplers = 0;
+			descRootSignature.pStaticSamplers   = NULL;
+			descRootSignature.Flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT;
+
+			ID3DBlob* outBlob;
+			ID3DBlob* errorBlob;
+			DX_CHECK(D3D12SerializeRootSignature(&descRootSignature
+					, D3D_ROOT_SIGNATURE_VERSION_1
+					, &outBlob
+					, &errorBlob
+					) );
+
+			DX_CHECK(m_device->CreateRootSignature(0
+					, outBlob->GetBufferPointer()
+					, outBlob->GetBufferSize()
+					, __uuidof(ID3D12RootSignature)
+					, (void**)&m_rootSignature
+					) );
+
+			UniformHandle handle = BGFX_INVALID_HANDLE;
+			for (uint32_t ii = 0; ii < PredefinedUniform::Count; ++ii)
+			{
+				m_uniformReg.add(handle, getPredefinedUniformName(PredefinedUniform::Enum(ii) ), &m_predefinedUniforms[ii]);
+			}
+
+			g_caps.supported |= ( 0
+								| BGFX_CAPS_TEXTURE_3D
+								| BGFX_CAPS_TEXTURE_COMPARE_ALL
+								| BGFX_CAPS_INSTANCING
+								| BGFX_CAPS_VERTEX_ATTRIB_HALF
+								| BGFX_CAPS_FRAGMENT_DEPTH
+								| BGFX_CAPS_BLEND_INDEPENDENT
+								| BGFX_CAPS_COMPUTE
+								| BGFX_CAPS_FRAGMENT_ORDERING
+//								| BGFX_CAPS_SWAP_CHAIN
+								);
+			g_caps.maxTextureSize   = 16384;
+			g_caps.maxFBAttachments = bx::uint32_min(16, BGFX_CONFIG_MAX_FRAME_BUFFER_ATTACHMENTS);
+
+			for (uint32_t ii = 0; ii < TextureFormat::Count; ++ii)
+			{
+				uint8_t support = BGFX_CAPS_FORMAT_TEXTURE_NONE;
+
+				const DXGI_FORMAT fmt = isDepth(TextureFormat::Enum(ii) )
+					? s_textureFormat[ii].m_fmtDsv
+					: s_textureFormat[ii].m_fmt
+					;
+
+				if (DXGI_FORMAT_UNKNOWN != fmt)
+				{
+					D3D12_FEATURE_DATA_FORMAT_SUPPORT data;
+					data.Format = fmt;
+					hr = m_device->CheckFeatureSupport(D3D12_FEATURE_FORMAT_SUPPORT, &data, sizeof(data) );
+					if (SUCCEEDED(hr) )
+					{
+						support |= 0 != (data.Support1 & (0
+								| D3D12_FORMAT_SUPPORT1_TEXTURE2D
+								| D3D12_FORMAT_SUPPORT1_TEXTURE3D
+								| D3D12_FORMAT_SUPPORT1_TEXTURECUBE
+								) )
+								? BGFX_CAPS_FORMAT_TEXTURE_COLOR
+								: BGFX_CAPS_FORMAT_TEXTURE_NONE
+								;
+
+						support |= 0 != (data.Support1 & (0
+								| D3D12_FORMAT_SUPPORT1_BUFFER
+								| D3D12_FORMAT_SUPPORT1_IA_VERTEX_BUFFER
+								| D3D12_FORMAT_SUPPORT1_IA_INDEX_BUFFER
+								) )
+								? BGFX_CAPS_FORMAT_TEXTURE_VERTEX
+								: BGFX_CAPS_FORMAT_TEXTURE_NONE
+								;
+
+						support |= 0 != (data.Support1 & (0
+								| D3D12_FORMAT_SUPPORT1_SHADER_LOAD
+								) )
+								? BGFX_CAPS_FORMAT_TEXTURE_IMAGE
+								: BGFX_CAPS_FORMAT_TEXTURE_NONE
+								;
+
+						support |= 0 != (data.Support1 & (0
+								| D3D12_FORMAT_SUPPORT1_RENDER_TARGET
+								| D3D12_FORMAT_SUPPORT1_DEPTH_STENCIL
+								) )
+								? BGFX_CAPS_FORMAT_TEXTURE_FRAMEBUFFER
+								: BGFX_CAPS_FORMAT_TEXTURE_NONE
+								;
+					}
+					else
+					{
+						BX_TRACE("CheckFeatureSupport failed with %x for format %s.", hr, getName(TextureFormat::Enum(ii) ) );
+					}
+				}
+
+				g_caps.formats[ii] = support;
+			}
+
+			postReset();
+		}
+
+		~RendererContextD3D12()
+		{
+			preReset();
+
+			m_samplerAllocator.destroy();
+
+			for (uint32_t ii = 0; ii < BX_COUNTOF(m_scratchBuffer); ++ii)
+			{
+				m_scratchBuffer[ii].destroy();
+			}
+
+			m_pipelineStateCache.invalidate();
+
+			for (uint32_t ii = 0; ii < BX_COUNTOF(m_indexBuffers); ++ii)
+			{
+				m_indexBuffers[ii].destroy();
+			}
+
+			for (uint32_t ii = 0; ii < BX_COUNTOF(m_vertexBuffers); ++ii)
+			{
+				m_vertexBuffers[ii].destroy();
+			}
+
+			for (uint32_t ii = 0; ii < BX_COUNTOF(m_shaders); ++ii)
+			{
+				m_shaders[ii].destroy();
+			}
+
+			for (uint32_t ii = 0; ii < BX_COUNTOF(m_textures); ++ii)
+			{
+				m_textures[ii].destroy();
+			}
+
+			DX_RELEASE(m_rtvDescriptorHeap, 0);
+			DX_RELEASE(m_dsvDescriptorHeap, 0);
+
+			DX_RELEASE(m_rootSignature, 0);
+
+			DX_RELEASE(m_swapChain, 0);
+
+			m_cmd.shutdown();
+
+			DX_RELEASE(m_device, 0);
+
+#if USE_D3D12_DYNAMIC_LIB
+			bx::dlclose(m_d3d12dll);
+			bx::dlclose(m_dxgidll);
+#endif // USE_D3D12_DYNAMIC_LIB
+		}
+
+		RendererType::Enum getRendererType() const BX_OVERRIDE
+		{
+			return RendererType::Direct3D12;
+		}
+
+		const char* getRendererName() const BX_OVERRIDE
+		{
+			return BGFX_RENDERER_DIRECT3D12_NAME;
+		}
+
+		static bool isLost(HRESULT _hr)
+		{
+			return DXGI_ERROR_DEVICE_REMOVED == _hr
+				|| DXGI_ERROR_DEVICE_HUNG == _hr
+				|| DXGI_ERROR_DEVICE_RESET == _hr
+				|| DXGI_ERROR_DRIVER_INTERNAL_ERROR == _hr
+				|| DXGI_ERROR_NOT_CURRENTLY_AVAILABLE == _hr
+				;
+		}
+
+		void flip(HMD& /*_hmd*/) BX_OVERRIDE
+		{
+			if (NULL != m_swapChain)
+			{
+				HRESULT hr = 0;
+				uint32_t syncInterval = !!(m_flags & BGFX_RESET_VSYNC);
+				for (uint32_t ii = 1, num = m_numWindows; ii < num && SUCCEEDED(hr); ++ii)
+				{
+					hr = m_frameBuffers[m_windows[ii].idx].m_swapChain->Present(syncInterval, 0);
+				}
+
+				if (SUCCEEDED(hr) )
+				{
+					m_cmd.finish(m_backBufferColorFence[(m_backBufferColorIdx-1) % m_scd.BufferCount]);
+					hr = m_swapChain->Present(syncInterval, 0);
+				}
+
+				if (FAILED(hr)
+				&&  isLost(hr) )
+				{
+					++m_lost;
+					BGFX_FATAL(10 > m_lost, bgfx::Fatal::DeviceLost, "Device is lost. FAILED 0x%08x", hr);
+				}
+				else
+				{
+					m_lost = 0;
+				}
+			}
+		}
+
+		void createIndexBuffer(IndexBufferHandle _handle, Memory* _mem, uint16_t _flags) BX_OVERRIDE
+		{
+			m_indexBuffers[_handle.idx].create(_mem->size, _mem->data, _flags, false);
+		}
+
+		void destroyIndexBuffer(IndexBufferHandle _handle) BX_OVERRIDE
+		{
+			m_indexBuffers[_handle.idx].destroy();
+		}
+
+		void createVertexDecl(VertexDeclHandle _handle, const VertexDecl& _decl) BX_OVERRIDE
+		{
+			VertexDecl& decl = m_vertexDecls[_handle.idx];
+			memcpy(&decl, &_decl, sizeof(VertexDecl) );
+			dump(decl);
+		}
+
+		void destroyVertexDecl(VertexDeclHandle /*_handle*/) BX_OVERRIDE
+		{
+		}
+
+		void createVertexBuffer(VertexBufferHandle _handle, Memory* _mem, VertexDeclHandle _declHandle, uint16_t _flags) BX_OVERRIDE
+		{
+			m_vertexBuffers[_handle.idx].create(_mem->size, _mem->data, _declHandle, _flags);
+		}
+
+		void destroyVertexBuffer(VertexBufferHandle _handle) BX_OVERRIDE
+		{
+			m_vertexBuffers[_handle.idx].destroy();
+		}
+
+		void createDynamicIndexBuffer(IndexBufferHandle _handle, uint32_t _size, uint16_t _flags) BX_OVERRIDE
+		{
+			m_indexBuffers[_handle.idx].create(_size, NULL, _flags, false);
+		}
+
+		void updateDynamicIndexBuffer(IndexBufferHandle _handle, uint32_t _offset, uint32_t _size, Memory* _mem) BX_OVERRIDE
+		{
+			m_indexBuffers[_handle.idx].update(m_commandList, _offset, bx::uint32_min(_size, _mem->size), _mem->data);
+		}
+
+		void destroyDynamicIndexBuffer(IndexBufferHandle _handle) BX_OVERRIDE
+		{
+			m_indexBuffers[_handle.idx].destroy();
+		}
+
+		void createDynamicVertexBuffer(VertexBufferHandle _handle, uint32_t _size, uint16_t _flags) BX_OVERRIDE
+		{
+			VertexDeclHandle decl = BGFX_INVALID_HANDLE;
+			m_vertexBuffers[_handle.idx].create(_size, NULL, decl, _flags);
+		}
+
+		void updateDynamicVertexBuffer(VertexBufferHandle _handle, uint32_t _offset, uint32_t _size, Memory* _mem) BX_OVERRIDE
+		{
+			m_vertexBuffers[_handle.idx].update(m_commandList, _offset, bx::uint32_min(_size, _mem->size), _mem->data);
+		}
+
+		void destroyDynamicVertexBuffer(VertexBufferHandle _handle) BX_OVERRIDE
+		{
+			m_vertexBuffers[_handle.idx].destroy();
+		}
+
+		void createShader(ShaderHandle _handle, Memory* _mem) BX_OVERRIDE
+		{
+			m_shaders[_handle.idx].create(_mem);
+		}
+
+		void destroyShader(ShaderHandle _handle) BX_OVERRIDE
+		{
+			m_shaders[_handle.idx].destroy();
+		}
+
+		void createProgram(ProgramHandle _handle, ShaderHandle _vsh, ShaderHandle _fsh) BX_OVERRIDE
+		{
+			m_program[_handle.idx].create(&m_shaders[_vsh.idx], isValid(_fsh) ? &m_shaders[_fsh.idx] : NULL);
+		}
+
+		void destroyProgram(ProgramHandle _handle) BX_OVERRIDE
+		{
+			m_program[_handle.idx].destroy();
+		}
+
+		void createTexture(TextureHandle _handle, Memory* _mem, uint32_t _flags, uint8_t _skip) BX_OVERRIDE
+		{
+			m_textures[_handle.idx].create(_mem, _flags, _skip);
+		}
+
+		void updateTextureBegin(TextureHandle /*_handle*/, uint8_t /*_side*/, uint8_t /*_mip*/) BX_OVERRIDE
+		{
+		}
+
+		void updateTexture(TextureHandle _handle, uint8_t _side, uint8_t _mip, const Rect& _rect, uint16_t _z, uint16_t _depth, uint16_t _pitch, const Memory* _mem) BX_OVERRIDE
+		{
+			m_textures[_handle.idx].update(m_commandList, _side, _mip, _rect, _z, _depth, _pitch, _mem);
+		}
+
+		void updateTextureEnd() BX_OVERRIDE
+		{
+		}
+
+		void resizeTexture(TextureHandle _handle, uint16_t _width, uint16_t _height) BX_OVERRIDE
+		{
+			TextureD3D12& texture = m_textures[_handle.idx];
+
+			uint32_t size = sizeof(uint32_t) + sizeof(TextureCreate);
+			const Memory* mem = alloc(size);
+
+			bx::StaticMemoryBlockWriter writer(mem->data, mem->size);
+			uint32_t magic = BGFX_CHUNK_MAGIC_TEX;
+			bx::write(&writer, magic);
+
+			TextureCreate tc;
+			tc.m_flags   = texture.m_flags;
+			tc.m_width   = _width;
+			tc.m_height  = _height;
+			tc.m_sides   = 0;
+			tc.m_depth   = 0;
+			tc.m_numMips = 1;
+			tc.m_format  = texture.m_requestedFormat;
+			tc.m_cubeMap = false;
+			tc.m_mem     = NULL;
+			bx::write(&writer, tc);
+
+			texture.destroy();
+			texture.create(mem, tc.m_flags, 0);
+
+			release(mem);
+		}
+
+		void destroyTexture(TextureHandle _handle) BX_OVERRIDE
+		{
+			m_textures[_handle.idx].destroy();
+		}
+
+		void createFrameBuffer(FrameBufferHandle _handle, uint8_t _num, const TextureHandle* _textureHandles) BX_OVERRIDE
+		{
+			m_frameBuffers[_handle.idx].create(_num, _textureHandles);
+		}
+
+		void createFrameBuffer(FrameBufferHandle _handle, void* _nwh, uint32_t _width, uint32_t _height, TextureFormat::Enum _depthFormat) BX_OVERRIDE
+		{
+			uint16_t denseIdx = m_numWindows++;
+			m_windows[denseIdx] = _handle;
+			m_frameBuffers[_handle.idx].create(denseIdx, _nwh, _width, _height, _depthFormat);
+		}
+
+		void destroyFrameBuffer(FrameBufferHandle _handle) BX_OVERRIDE
+		{
+			uint16_t denseIdx = m_frameBuffers[_handle.idx].destroy();
+			if (UINT16_MAX != denseIdx)
+			{
+				--m_numWindows;
+				if (m_numWindows > 1)
+				{
+					FrameBufferHandle handle = m_windows[m_numWindows];
+					m_windows[denseIdx] = handle;
+					m_frameBuffers[handle.idx].m_denseIdx = denseIdx;
+				}
+			}
+		}
+
+		void createUniform(UniformHandle _handle, UniformType::Enum _type, uint16_t _num, const char* _name) BX_OVERRIDE
+		{
+			if (NULL != m_uniforms[_handle.idx])
+			{
+				BX_FREE(g_allocator, m_uniforms[_handle.idx]);
+			}
+
+			uint32_t size = BX_ALIGN_16(g_uniformTypeSize[_type] * _num);
+			void* data = BX_ALLOC(g_allocator, size);
+			memset(data, 0, size);
+			m_uniforms[_handle.idx] = data;
+			m_uniformReg.add(_handle, _name, data);
+		}
+
+		void destroyUniform(UniformHandle _handle) BX_OVERRIDE
+		{
+			BX_FREE(g_allocator, m_uniforms[_handle.idx]);
+			m_uniforms[_handle.idx] = NULL;
+		}
+
+		void saveScreenShot(const char* /*_filePath*/) BX_OVERRIDE
+		{
+		}
+
+		void updateViewName(uint8_t /*_id*/, const char* /*_name*/) BX_OVERRIDE
+		{
+		}
+
+		void updateUniform(uint16_t _loc, const void* _data, uint32_t _size) BX_OVERRIDE
+		{
+			memcpy(m_uniforms[_loc], _data, _size);
+		}
+
+		void setMarker(const char* /*_marker*/, uint32_t /*_size*/) BX_OVERRIDE
+		{
+		}
+
+		void submit(Frame* _render, ClearQuad& _clearQuad, TextVideoMemBlitter& _textVideoMemBlitter) BX_OVERRIDE;
+
+		void blitSetup(TextVideoMemBlitter& _blitter) BX_OVERRIDE
+		{
+			const uint32_t width  = m_scd.BufferDesc.Width;
+			const uint32_t height = m_scd.BufferDesc.Height;
+
+			FrameBufferHandle fbh = BGFX_INVALID_HANDLE;
+			setFrameBuffer(fbh, false);
+
+			D3D12_VIEWPORT vp;
+			vp.TopLeftX = 0;
+			vp.TopLeftY = 0;
+			vp.Width    = (float)width;
+			vp.Height   = (float)height;
+			vp.MinDepth = 0.0f;
+			vp.MaxDepth = 1.0f;
+			m_commandList->RSSetViewports(1, &vp);
+
+			const uint64_t state = 0
+				| BGFX_STATE_RGB_WRITE
+				| BGFX_STATE_ALPHA_WRITE
+				| BGFX_STATE_DEPTH_TEST_ALWAYS
+				;
+
+			m_currentProgram = &m_program[0];
+			ID3D12PipelineState* pso = getPipelineState(state
+				, packStencil(BGFX_STENCIL_DEFAULT, BGFX_STENCIL_DEFAULT)
+				, _blitter.m_vb->decl.idx
+				, _blitter.m_program.idx
+				, 0
+				);
+			m_commandList->SetPipelineState(pso);
+			m_commandList->SetGraphicsRootSignature(m_rootSignature);
+
+ 			float proj[16];
+ 			bx::mtxOrtho(proj, 0.0f, (float)width, (float)height, 0.0f, 0.0f, 1000.0f);
+
+ 			PredefinedUniform& predefined = m_program[_blitter.m_program.idx].m_predefined[0];
+ 			uint8_t flags = predefined.m_type;
+ 			setShaderUniform(flags, predefined.m_loc, proj, 4);
+
+ 			D3D12_GPU_DESCRIPTOR_HANDLE gpuHandle;
+			commitShaderConstants(gpuHandle);
+
+			ID3D12DescriptorHeap* heaps[] =
+			{
+				m_samplerAllocator.getHeap(),
+				m_scratchBuffer[m_backBufferColorIdx].getHeap(),
+			};
+			m_commandList->SetDescriptorHeaps(BX_COUNTOF(heaps), heaps);
+			m_commandList->SetGraphicsRootDescriptorTable(Rdt::CBV, gpuHandle);
+
+			TextureD3D12& texture = m_textures[_blitter.m_texture.idx];
+			uint32_t samplerFlags[BGFX_CONFIG_MAX_TEXTURE_SAMPLERS] = { texture.m_flags & BGFX_TEXTURE_SAMPLER_BITS_MASK };
+			uint16_t samplerStateIdx = getSamplerState(samplerFlags);
+			m_commandList->SetGraphicsRootDescriptorTable(Rdt::Sampler, m_samplerAllocator.get(samplerStateIdx) );
+			D3D12_GPU_DESCRIPTOR_HANDLE srvHandle;
+			m_scratchBuffer[m_backBufferColorIdx].alloc(srvHandle, texture);
+			m_commandList->SetGraphicsRootDescriptorTable(Rdt::SRV, srvHandle);
+
+			VertexBufferD3D12& vb  = m_vertexBuffers[_blitter.m_vb->handle.idx];
+			const VertexDecl& vertexDecl = m_vertexDecls[_blitter.m_vb->decl.idx];
+			D3D12_VERTEX_BUFFER_VIEW viewDesc;
+			viewDesc.BufferLocation = vb.m_ptr->GetGPUVirtualAddress();
+			viewDesc.StrideInBytes  = vertexDecl.m_stride;
+			viewDesc.SizeInBytes    = vb.m_size;
+			m_commandList->IASetVertexBuffers(0, 1, &viewDesc);
+
+			const BufferD3D12& ib = m_indexBuffers[_blitter.m_ib->handle.idx];
+			D3D12_INDEX_BUFFER_VIEW ibv;
+			ibv.Format         = DXGI_FORMAT_R16_UINT;
+			ibv.BufferLocation = ib.m_ptr->GetGPUVirtualAddress();
+			ibv.SizeInBytes    = ib.m_size;
+			m_commandList->IASetIndexBuffer(&ibv);
+
+			m_commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
+		}
+
+		void blitRender(TextVideoMemBlitter& _blitter, uint32_t _numIndices) BX_OVERRIDE
+		{
+			const uint32_t numVertices = _numIndices*4/6;
+			if (0 < numVertices)
+			{
+				m_indexBuffers [_blitter.m_ib->handle.idx].update(m_commandList, 0, _numIndices*2, _blitter.m_ib->data);
+				m_vertexBuffers[_blitter.m_vb->handle.idx].update(m_commandList, 0, numVertices*_blitter.m_decl.m_stride, _blitter.m_vb->data, true);
+
+				m_commandList->DrawIndexedInstanced(_numIndices
+					, 1
+					, 0
+					, 0
+					, 0
+					);
+			}
+		}
+
+		void preReset()
+		{
+			finish();
+
+			for (uint32_t ii = 0, num = m_scd.BufferCount; ii < num; ++ii)
+			{
+				DX_RELEASE(m_backBufferColor[ii], num-1-ii);
+			}
+			DX_RELEASE(m_backBufferDepthStencil, 0);
+
+			for (uint32_t ii = 0; ii < BX_COUNTOF(m_frameBuffers); ++ii)
+			{
+				m_frameBuffers[ii].preReset();
+			}
+
+			invalidateCache();
+
+//			capturePreReset();
+		}
+
+		void postReset()
+		{
+			uint32_t rtvDescriptorSize = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
+
+			for (uint32_t ii = 0, num = m_scd.BufferCount; ii < num; ++ii)
+			{
+				D3D12_CPU_DESCRIPTOR_HANDLE handle = m_rtvDescriptorHeap->GetCPUDescriptorHandleForHeapStart();
+				handle.ptr += ii * rtvDescriptorSize;
+				DX_CHECK(m_swapChain->GetBuffer(ii
+						, __uuidof(ID3D12Resource)
+						, (void**)&m_backBufferColor[ii]
+						) );
+				m_device->CreateRenderTargetView(m_backBufferColor[ii], NULL, handle);
+			}
+
+			D3D12_RESOURCE_DESC resourceDesc;
+			resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
+			resourceDesc.Alignment = 0;
+			resourceDesc.Width     = bx::uint32_max(m_resolution.m_width,  1);
+			resourceDesc.Height    = bx::uint32_max(m_resolution.m_height, 1);
+			resourceDesc.DepthOrArraySize   = 1;
+			resourceDesc.MipLevels          = 0;
+			resourceDesc.Format             = DXGI_FORMAT_D24_UNORM_S8_UINT;
+			resourceDesc.SampleDesc.Count   = 1;
+			resourceDesc.SampleDesc.Quality = 0;
+			resourceDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
+			resourceDesc.Flags  = D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL;
+
+			D3D12_CLEAR_VALUE clearValue;
+			clearValue.Format = resourceDesc.Format;
+			clearValue.DepthStencil.Depth   = 1.0f;
+			clearValue.DepthStencil.Stencil = 0;
+
+			m_backBufferDepthStencil = createCommittedResource(m_device, HeapProperty::Default, &resourceDesc, &clearValue);
+
+			D3D12_DEPTH_STENCIL_VIEW_DESC dsvDesc;
+			ZeroMemory(&dsvDesc, sizeof(dsvDesc) );
+			dsvDesc.Format        = resourceDesc.Format;
+			dsvDesc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2D;
+			dsvDesc.Flags         = D3D12_DSV_FLAGS(0)
+// 				| D3D12_DSV_FLAG_READ_ONLY_DEPTH
+// 				| D3D12_DSV_FLAG_READ_ONLY_DEPTH
+				;
+
+			m_device->CreateDepthStencilView(m_backBufferDepthStencil
+				, &dsvDesc
+				, m_dsvDescriptorHeap->GetCPUDescriptorHandleForHeapStart()
+				);
+
+			for (uint32_t ii = 0; ii < BX_COUNTOF(m_frameBuffers); ++ii)
+			{
+				m_frameBuffers[ii].postReset();
+			}
+
+			m_commandList = m_cmd.alloc();
+//			capturePostReset();
+		}
+
+		void invalidateCache()
+		{
+			m_pipelineStateCache.invalidate();
+			m_samplerStateCache.invalidate();
+		}
+
+		void updateMsaa()
+		{
+			for (uint32_t ii = 1, last = 0; ii < BX_COUNTOF(s_msaa); ++ii)
+			{
+				uint32_t msaa = s_checkMsaa[ii];
+
+				D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS data;
+				memset(&data, 0, sizeof(msaa) );
+				data.Format = m_scd.BufferDesc.Format;
+				data.SampleCount = msaa;
+				data.Flags = D3D12_MULTISAMPLE_QUALITY_LEVELS_FLAG_NONE;
+				HRESULT hr = m_device->CheckFeatureSupport(D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS, &data, sizeof(data) );
+
+data.NumQualityLevels = 0;
+
+				if (SUCCEEDED(hr)
+				&&  0 < data.NumQualityLevels)
+				{
+					s_msaa[ii].Count   = data.SampleCount;
+					s_msaa[ii].Quality = data.NumQualityLevels - 1;
+					last = ii;
+				}
+				else
+				{
+					s_msaa[ii] = s_msaa[last];
+				}
+			}
+		}
+
+		void updateResolution(const Resolution& _resolution)
+		{
+			if ( (uint32_t)m_scd.BufferDesc.Width != _resolution.m_width
+			||   (uint32_t)m_scd.BufferDesc.Height != _resolution.m_height
+			||   m_flags != _resolution.m_flags)
+			{
+				bool resize = (m_flags&BGFX_RESET_MSAA_MASK) == (_resolution.m_flags&BGFX_RESET_MSAA_MASK);
+				m_flags = _resolution.m_flags;
+
+				m_textVideoMem.resize(false, _resolution.m_width, _resolution.m_height);
+				m_textVideoMem.clear();
+
+				m_resolution = _resolution;
+
+				m_scd.BufferDesc.Width = _resolution.m_width;
+				m_scd.BufferDesc.Height = _resolution.m_height;
+
+				preReset();
+
+				if (resize)
+				{
+					DX_CHECK(m_swapChain->ResizeBuffers(m_scd.BufferCount
+							, m_scd.BufferDesc.Width
+							, m_scd.BufferDesc.Height
+							, m_scd.BufferDesc.Format
+							, m_scd.Flags
+							) );
+				}
+				else
+				{
+					updateMsaa();
+					m_scd.SampleDesc = s_msaa[(m_flags&BGFX_RESET_MSAA_MASK)>>BGFX_RESET_MSAA_SHIFT];
+
+					DX_RELEASE(m_swapChain, 0);
+
+					HRESULT hr;
+					hr = m_factory->CreateSwapChain(m_cmd.m_commandQueue
+							, &m_scd
+							, &m_swapChain
+							);
+					BGFX_FATAL(SUCCEEDED(hr), bgfx::Fatal::UnableToInitialize, "Failed to create swap chain.");
+				}
+
+				postReset();
+			}
+		}
+
+		void setShaderUniform(uint8_t _flags, uint16_t _regIndex, const void* _val, uint16_t _numRegs)
+		{
+			if (_flags&BGFX_UNIFORM_FRAGMENTBIT)
+			{
+				memcpy(&m_fsScratch[_regIndex], _val, _numRegs*16);
+				m_fsChanges += _numRegs;
+			}
+			else
+			{
+				memcpy(&m_vsScratch[_regIndex], _val, _numRegs*16);
+				m_vsChanges += _numRegs;
+			}
+		}
+
+		void setShaderUniform4f(uint8_t _flags, uint16_t _regIndex, const void* _val, uint16_t _numRegs)
+		{
+			setShaderUniform(_flags, _regIndex, _val, _numRegs);
+		}
+
+		void setShaderUniform4x4f(uint8_t _flags, uint16_t _regIndex, const void* _val, uint16_t _numRegs)
+		{
+			setShaderUniform(_flags, _regIndex, _val, _numRegs);
+		}
+
+		void commitShaderConstants(D3D12_GPU_DESCRIPTOR_HANDLE& gpuHandle)
+		{
+			uint32_t total = bx::strideAlign(0
+				+ m_currentProgram->m_vsh->m_size
+				+ (NULL != m_currentProgram->m_fsh ? m_currentProgram->m_fsh->m_size : 0)
+				, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT
+				);
+			uint8_t* data = (uint8_t*)m_scratchBuffer[m_backBufferColorIdx].alloc(gpuHandle, total);
+
+			{
+				uint32_t size = m_currentProgram->m_vsh->m_size;
+				memcpy(data, m_vsScratch, size);
+				data += size;
+
+				m_vsChanges = 0;
+			}
+
+			if (NULL != m_currentProgram->m_fsh)
+			{
+				memcpy(data, m_fsScratch, m_currentProgram->m_fsh->m_size);
+
+				m_fsChanges = 0;
+			}
+		}
+
+		void setFrameBuffer(FrameBufferHandle _fbh, bool _msaa = true)
+		{
+			if (isValid(m_fbh)
+			&&  m_fbh.idx != _fbh.idx)
+			{
+				const FrameBufferD3D12& frameBuffer = m_frameBuffers[m_fbh.idx];
+
+				for (uint8_t ii = 0, num = frameBuffer.m_num; ii < num; ++ii)
+				{
+					TextureD3D12& texture = m_textures[frameBuffer.m_texture[ii].idx];
+					texture.setState(m_commandList, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
+				}
+
+				if (isValid(frameBuffer.m_depth) )
+				{
+					TextureD3D12& texture = m_textures[frameBuffer.m_depth.idx];
+					const bool bufferOnly = 0 != (texture.m_flags&BGFX_TEXTURE_RT_BUFFER_ONLY);
+					if (!bufferOnly)
+					{
+						texture.setState(m_commandList, D3D12_RESOURCE_STATES(0)
+							| D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE
+//							| D3D12_RESOURCE_STATE_DEPTH_READ
+							);
+					}
+				}
+			}
+
+			if (!isValid(_fbh) )
+			{
+				m_rtvHandle = m_rtvDescriptorHeap->GetCPUDescriptorHandleForHeapStart();
+				uint32_t rtvDescriptorSize = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
+				m_rtvHandle.ptr += m_backBufferColorIdx * rtvDescriptorSize;
+				m_dsvHandle = m_dsvDescriptorHeap->GetCPUDescriptorHandleForHeapStart();
+
+				m_currentColor        = &m_rtvHandle;
+				m_currentDepthStencil = &m_dsvHandle;
+				m_commandList->OMSetRenderTargets(1, m_currentColor, false, m_currentDepthStencil);
+			}
+			else
+			{
+				const FrameBufferD3D12& frameBuffer = m_frameBuffers[_fbh.idx];
+
+				if (0 < frameBuffer.m_num)
+				{
+					D3D12_CPU_DESCRIPTOR_HANDLE rtvDescriptor = m_rtvDescriptorHeap->GetCPUDescriptorHandleForHeapStart();
+					uint32_t rtvDescriptorSize = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
+					m_rtvHandle.ptr = rtvDescriptor.ptr + (BX_COUNTOF(m_backBufferColor) + _fbh.idx * BGFX_CONFIG_MAX_FRAME_BUFFER_ATTACHMENTS) * rtvDescriptorSize;
+					m_currentColor  = &m_rtvHandle;
+				}
+				else
+				{
+					m_currentColor = NULL;
+				}
+
+				if (isValid(frameBuffer.m_depth) )
+				{
+					D3D12_CPU_DESCRIPTOR_HANDLE dsvDescriptor = m_dsvDescriptorHeap->GetCPUDescriptorHandleForHeapStart();
+					uint32_t dsvDescriptorSize = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_DSV);
+					m_dsvHandle.ptr = dsvDescriptor.ptr + (1 + _fbh.idx) * dsvDescriptorSize;
+					m_currentDepthStencil = &m_dsvHandle;
+				}
+				else
+				{
+					m_currentDepthStencil = NULL;
+				}
+
+				for (uint8_t ii = 0, num = frameBuffer.m_num; ii < num; ++ii)
+				{
+					TextureD3D12& texture = m_textures[frameBuffer.m_texture[ii].idx];
+					texture.setState(m_commandList, D3D12_RESOURCE_STATE_RENDER_TARGET);
+				}
+
+				if (isValid(frameBuffer.m_depth) )
+				{
+					TextureD3D12& texture = m_textures[frameBuffer.m_depth.idx];
+					texture.setState(m_commandList, D3D12_RESOURCE_STATE_DEPTH_WRITE);
+				}
+
+				m_commandList->OMSetRenderTargets(frameBuffer.m_num
+												, m_currentColor
+												, true //NULL == m_currentDepthStencil
+												, m_currentDepthStencil
+												);
+			}
+
+			m_fbh = _fbh;
+			m_rtMsaa = _msaa;
+		}
+
+		void setBlendState(D3D12_BLEND_DESC& desc, uint64_t _state, uint32_t _rgba = 0)
+		{
+			memset(&desc, 0, sizeof(desc) );
+			desc.IndependentBlendEnable = !!(BGFX_STATE_BLEND_INDEPENDENT & _state);
+
+			D3D12_RENDER_TARGET_BLEND_DESC* drt = &desc.RenderTarget[0];
+			drt->BlendEnable = !!(BGFX_STATE_BLEND_MASK & _state);
+
+			{
+				const uint32_t blend    = uint32_t( (_state & BGFX_STATE_BLEND_MASK         ) >> BGFX_STATE_BLEND_SHIFT);
+				const uint32_t equation = uint32_t( (_state & BGFX_STATE_BLEND_EQUATION_MASK) >> BGFX_STATE_BLEND_EQUATION_SHIFT);
+
+				const uint32_t srcRGB = (blend      ) & 0xf;
+				const uint32_t dstRGB = (blend >>  4) & 0xf;
+				const uint32_t srcA   = (blend >>  8) & 0xf;
+				const uint32_t dstA   = (blend >> 12) & 0xf;
+
+				const uint32_t equRGB = (equation     ) & 0x7;
+				const uint32_t equA   = (equation >> 3) & 0x7;
+
+				drt->SrcBlend       = s_blendFactor[srcRGB][0];
+				drt->DestBlend      = s_blendFactor[dstRGB][0];
+				drt->BlendOp        = s_blendEquation[equRGB];
+
+				drt->SrcBlendAlpha  = s_blendFactor[srcA][1];
+				drt->DestBlendAlpha = s_blendFactor[dstA][1];
+				drt->BlendOpAlpha   = s_blendEquation[equA];
+			}
+
+			uint32_t writeMask = (_state & BGFX_STATE_ALPHA_WRITE)
+					? D3D12_COLOR_WRITE_ENABLE_ALPHA
+					: 0
+					;
+			writeMask |= (_state & BGFX_STATE_RGB_WRITE)
+					? D3D12_COLOR_WRITE_ENABLE_RED
+					| D3D12_COLOR_WRITE_ENABLE_GREEN
+					| D3D12_COLOR_WRITE_ENABLE_BLUE
+					: 0
+					;
+
+			drt->RenderTargetWriteMask = writeMask;
+
+			if (desc.IndependentBlendEnable)
+			{
+				for (uint32_t ii = 1, rgba = _rgba; ii < BGFX_CONFIG_MAX_FRAME_BUFFER_ATTACHMENTS; ++ii, rgba >>= 11)
+				{
+					drt = &desc.RenderTarget[ii];
+					drt->BlendEnable = 0 != (rgba & 0x7ff);
+
+					const uint32_t src      = (rgba     ) & 0xf;
+					const uint32_t dst      = (rgba >> 4) & 0xf;
+					const uint32_t equation = (rgba >> 8) & 0x7;
+
+					drt->SrcBlend       = s_blendFactor[src][0];
+					drt->DestBlend      = s_blendFactor[dst][0];
+					drt->BlendOp        = s_blendEquation[equation];
+
+					drt->SrcBlendAlpha  = s_blendFactor[src][1];
+					drt->DestBlendAlpha = s_blendFactor[dst][1];
+					drt->BlendOpAlpha   = s_blendEquation[equation];
+
+					drt->RenderTargetWriteMask = writeMask;
+				}
+			}
+			else
+			{
+				for (uint32_t ii = 1; ii < BGFX_CONFIG_MAX_FRAME_BUFFER_ATTACHMENTS; ++ii)
+				{
+					memcpy(&desc.RenderTarget[ii], drt, sizeof(D3D12_RENDER_TARGET_BLEND_DESC) );
+				}
+			}
+		}
+
+		void setRasterizerState(D3D12_RASTERIZER_DESC& desc, uint64_t _state, bool _wireframe = false)
+		{
+			const uint32_t cull = (_state&BGFX_STATE_CULL_MASK) >> BGFX_STATE_CULL_SHIFT;
+
+			desc.FillMode = _wireframe
+				? D3D12_FILL_MODE_WIREFRAME
+				: D3D12_FILL_MODE_SOLID
+				;
+			desc.CullMode = s_cullMode[cull];
+			desc.FrontCounterClockwise = false;
+			desc.DepthBias = 0;
+			desc.DepthBiasClamp = 0.0f;
+			desc.SlopeScaledDepthBias = 0.0f;
+			desc.DepthClipEnable = false;
+			desc.MultisampleEnable = !!(_state&BGFX_STATE_MSAA);
+			desc.AntialiasedLineEnable = false;
+			desc.ForcedSampleCount = 0;
+			desc.ConservativeRaster = D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF;
+		}
+
+		void setDepthStencilState(D3D12_DEPTH_STENCIL_DESC& desc, uint64_t _state, uint64_t _stencil = 0)
+		{
+			const uint32_t fstencil = unpackStencil(0, _stencil);
+
+			memset(&desc, 0, sizeof(desc) );
+			uint32_t func = (_state&BGFX_STATE_DEPTH_TEST_MASK)>>BGFX_STATE_DEPTH_TEST_SHIFT;
+			desc.DepthEnable = 0 != func;
+			desc.DepthWriteMask = !!(BGFX_STATE_DEPTH_WRITE & _state)
+				? D3D12_DEPTH_WRITE_MASK_ALL
+				: D3D12_DEPTH_WRITE_MASK_ZERO
+				;
+			desc.DepthFunc = s_cmpFunc[func];
+
+			uint32_t bstencil = unpackStencil(1, _stencil);
+			uint32_t frontAndBack = bstencil != BGFX_STENCIL_NONE && bstencil != fstencil;
+			bstencil = frontAndBack ? bstencil : fstencil;
+
+			desc.StencilEnable    = 0 != _stencil;
+			desc.StencilReadMask  = (fstencil & BGFX_STENCIL_FUNC_RMASK_MASK) >> BGFX_STENCIL_FUNC_RMASK_SHIFT;
+			desc.StencilWriteMask = 0xff;
+
+			desc.FrontFace.StencilFailOp      = s_stencilOp[(fstencil & BGFX_STENCIL_OP_FAIL_S_MASK) >> BGFX_STENCIL_OP_FAIL_S_SHIFT];
+			desc.FrontFace.StencilDepthFailOp = s_stencilOp[(fstencil & BGFX_STENCIL_OP_FAIL_Z_MASK) >> BGFX_STENCIL_OP_FAIL_Z_SHIFT];
+			desc.FrontFace.StencilPassOp      = s_stencilOp[(fstencil & BGFX_STENCIL_OP_PASS_Z_MASK) >> BGFX_STENCIL_OP_PASS_Z_SHIFT];
+			desc.FrontFace.StencilFunc        = s_cmpFunc[(fstencil & BGFX_STENCIL_TEST_MASK) >> BGFX_STENCIL_TEST_SHIFT];
+
+			desc.BackFace.StencilFailOp       = s_stencilOp[(bstencil & BGFX_STENCIL_OP_FAIL_S_MASK) >> BGFX_STENCIL_OP_FAIL_S_SHIFT];
+			desc.BackFace.StencilDepthFailOp  = s_stencilOp[(bstencil & BGFX_STENCIL_OP_FAIL_Z_MASK) >> BGFX_STENCIL_OP_FAIL_Z_SHIFT];
+			desc.BackFace.StencilPassOp       = s_stencilOp[(bstencil & BGFX_STENCIL_OP_PASS_Z_MASK) >> BGFX_STENCIL_OP_PASS_Z_SHIFT];
+			desc.BackFace.StencilFunc         = s_cmpFunc[(bstencil&BGFX_STENCIL_TEST_MASK) >> BGFX_STENCIL_TEST_SHIFT];
+		}
+
+		uint32_t setInputLayout(D3D12_INPUT_ELEMENT_DESC* _vertexElements, const VertexDecl& _vertexDecl, const ProgramD3D12& _program, uint8_t _numInstanceData)
+		{
+			VertexDecl decl;
+			memcpy(&decl, &_vertexDecl, sizeof(VertexDecl) );
+			const uint8_t* attrMask = _program.m_vsh->m_attrMask;
+
+			for (uint32_t ii = 0; ii < Attrib::Count; ++ii)
+			{
+				uint8_t mask = attrMask[ii];
+				uint8_t attr = (decl.m_attributes[ii] & mask);
+				decl.m_attributes[ii] = attr == 0 ? 0xff : attr == 0xff ? 0 : attr;
+			}
+
+			D3D12_INPUT_ELEMENT_DESC* elem = fillVertexDecl(_vertexElements, decl);
+			uint32_t num = uint32_t(elem-_vertexElements);
+
+			const D3D12_INPUT_ELEMENT_DESC inst = { "TEXCOORD", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA, 1 };
+
+			for (uint32_t ii = 0; ii < _numInstanceData; ++ii)
+			{
+				uint32_t index = 7 - ii; // TEXCOORD7 = i_data0, TEXCOORD6 = i_data1, etc.
+
+				uint32_t jj;
+				D3D12_INPUT_ELEMENT_DESC* curr = _vertexElements;
+				for (jj = 0; jj < num; ++jj)
+				{
+					curr = &_vertexElements[jj];
+					if (0 == strcmp(curr->SemanticName, "TEXCOORD")
+					&&  curr->SemanticIndex == index)
+					{
+						break;
+					}
+				}
+
+				if (jj == num)
+				{
+					curr = elem;
+					++elem;
+				}
+
+				memcpy(curr, &inst, sizeof(D3D12_INPUT_ELEMENT_DESC) );
+				curr->InputSlot = 1;
+				curr->SemanticIndex = index;
+				curr->AlignedByteOffset = ii*16;
+			}
+
+			return uint32_t(elem-_vertexElements);
+		}
+
+		static void patchCb0(DxbcInstruction& _instruction, void* _userData)
+		{
+			union { void* ptr; uint32_t offset; } cast ={ _userData };
+
+			for (uint32_t ii = 0; ii < _instruction.numOperands; ++ii)
+			{
+				DxbcOperand& operand = _instruction.operand[ii];
+				if (DxbcOperandType::ConstantBuffer == operand.type)
+				{
+					if (DxbcOperandAddrMode::Imm32 == operand.addrMode[0]
+					&&  0 == operand.regIndex[0]
+					&&  DxbcOperandAddrMode::Imm32 == operand.addrMode[1])
+					{
+						operand.regIndex[1] += cast.offset;
+					}
+				}
+			}
+		}
+
+		ID3D12PipelineState* getPipelineState(uint16_t _programIdx)
+		{
+			ProgramD3D12& program = m_program[_programIdx];
+
+			bx::HashMurmur2A murmur;
+			murmur.begin();
+			murmur.add(program.m_vsh->m_hash);
+			const uint32_t hash = murmur.end();
+
+			ID3D12PipelineState* pso = m_pipelineStateCache.find(hash);
+
+			if(NULL != pso)
+			{
+				return pso;
+			}
+
+			D3D12_COMPUTE_PIPELINE_STATE_DESC desc;
+			memset(&desc, 0, sizeof(desc) );
+
+			desc.pRootSignature = m_rootSignature;
+
+			desc.CS.pShaderBytecode = program.m_vsh->m_code->data;
+			desc.CS.BytecodeLength  = program.m_vsh->m_code->size;
+
+			DX_CHECK(m_device->CreateComputePipelineState(&desc
+				,__uuidof(ID3D12PipelineState)
+				,(void**)&pso
+				));
+			m_pipelineStateCache.add(hash, pso);
+
+			return pso;
+		}
+
+		ID3D12PipelineState* getPipelineState(uint64_t _state, uint64_t _stencil, uint16_t _declIdx, uint16_t _programIdx, uint8_t _numInstanceData)
+		{
+			ProgramD3D12& program = m_program[_programIdx];
+
+			_state &= 0
+				| BGFX_STATE_RGB_WRITE
+				| BGFX_STATE_ALPHA_WRITE
+				| BGFX_STATE_DEPTH_WRITE
+				| BGFX_STATE_DEPTH_TEST_MASK
+				| BGFX_STATE_BLEND_MASK
+				| BGFX_STATE_BLEND_EQUATION_MASK
+				| BGFX_STATE_BLEND_INDEPENDENT
+				| BGFX_STATE_CULL_MASK
+				| BGFX_STATE_MSAA
+				| BGFX_STATE_PT_MASK
+				;
+
+			_stencil &= packStencil(~BGFX_STENCIL_FUNC_REF_MASK, BGFX_STENCIL_MASK);
+
+			VertexDecl decl;
+			memcpy(&decl, &m_vertexDecls[_declIdx], sizeof(VertexDecl) );
+			const uint8_t* attrMask = program.m_vsh->m_attrMask;
+
+			for (uint32_t ii = 0; ii < Attrib::Count; ++ii)
+			{
+				uint8_t mask = attrMask[ii];
+				uint8_t attr = (decl.m_attributes[ii] & mask);
+				decl.m_attributes[ii] = attr == 0 ? 0xff : attr == 0xff ? 0 : attr;
+			}
+
+			bx::HashMurmur2A murmur;
+			murmur.begin();
+			murmur.add(_state);
+			murmur.add(_stencil);
+			murmur.add(program.m_vsh->m_hash);
+			murmur.add(program.m_vsh->m_attrMask, sizeof(program.m_vsh->m_attrMask) );
+			murmur.add(program.m_fsh->m_hash);
+			murmur.add(m_vertexDecls[_declIdx].m_hash);
+			murmur.add(decl.m_attributes, sizeof(decl.m_attributes) );
+			murmur.add(m_fbh.idx);
+			murmur.add(_numInstanceData);
+			const uint32_t hash = murmur.end();
+
+			ID3D12PipelineState* pso = m_pipelineStateCache.find(hash);
+
+			if (NULL != pso)
+			{
+				return pso;
+			}
+
+			D3D12_GRAPHICS_PIPELINE_STATE_DESC desc;
+			memset(&desc, 0, sizeof(desc) );
+
+			desc.pRootSignature = m_rootSignature;
+
+			desc.VS.pShaderBytecode = program.m_vsh->m_code->data;
+			desc.VS.BytecodeLength  = program.m_vsh->m_code->size;
+
+ 			const Memory* temp = alloc(program.m_fsh->m_code->size);
+ 			memset(temp->data, 0, temp->size);
+ 			bx::MemoryReader rd(program.m_fsh->m_code->data, program.m_fsh->m_code->size);
+ 			bx::StaticMemoryBlockWriter wr(temp->data, temp->size);
+
+			DxbcContext dxbc;
+			read(&rd, dxbc);
+
+			bool patchShader = true;
+			if (BX_ENABLED(BGFX_CONFIG_DEBUG) )
+			{
+				union { uint32_t offset; void* ptr; } cast = { 0 };
+				filter(dxbc.shader, dxbc.shader, patchCb0, cast.ptr);
+
+				write(&wr, dxbc);
+
+				dxbcHash(temp->data + 20, temp->size - 20, temp->data + 4);
+
+				patchShader = 0 == memcmp(program.m_fsh->m_code->data, temp->data, 16);
+				BX_CHECK(patchShader, "DXBC fragment shader patching error (ShaderHandle: %d).", program.m_fsh - m_shaders);
+
+				if (!patchShader)
+				{
+					for (uint32_t ii = 20; ii < temp->size; ii += 16)
+					{
+						if (0 != memcmp(&program.m_fsh->m_code->data[ii], &temp->data[ii], 16) )
+						{
+// 							dbgPrintfData(&program.m_fsh->m_code->data[ii], temp->size-ii, "");
+// 							dbgPrintfData(&temp->data[ii], temp->size-ii, "");
+							break;
+						}
+					}
+
+					desc.PS.pShaderBytecode = program.m_fsh->m_code->data;
+					desc.PS.BytecodeLength  = program.m_fsh->m_code->size;
+				}
+			}
+
+			if (patchShader)
+			{
+				memcpy(temp->data, program.m_fsh->m_code->data, program.m_fsh->m_code->size);
+
+				bx::seek(&wr, 0, bx::Whence::Begin);
+				union { uint32_t offset; void* ptr; } cast =
+				{
+					m_currentProgram->m_vsh->m_size/16
+				};
+				filter(dxbc.shader, dxbc.shader, patchCb0, cast.ptr);
+				write(&wr, dxbc);
+				dxbcHash(temp->data + 20, temp->size - 20, temp->data + 4);
+
+				desc.PS.pShaderBytecode = temp->data;
+				desc.PS.BytecodeLength  = temp->size;
+			}
+
+			desc.DS.pShaderBytecode = NULL;
+			desc.DS.BytecodeLength  = 0;
+
+			desc.HS.pShaderBytecode = NULL;
+			desc.HS.BytecodeLength  = 0;
+
+			desc.GS.pShaderBytecode = NULL;
+			desc.GS.BytecodeLength  = 0;
+
+			desc.StreamOutput.pSODeclaration   = NULL;
+			desc.StreamOutput.NumEntries       = 0;
+			desc.StreamOutput.pBufferStrides   = NULL;
+			desc.StreamOutput.NumStrides       = 0;
+			desc.StreamOutput.RasterizedStream = 0;
+
+			setBlendState(desc.BlendState, _state);
+			desc.SampleMask = 1;
+			setRasterizerState(desc.RasterizerState, _state);
+			setDepthStencilState(desc.DepthStencilState, _state, _stencil);
+
+			D3D12_INPUT_ELEMENT_DESC vertexElements[Attrib::Count + 1 + BGFX_CONFIG_MAX_INSTANCE_DATA_COUNT];
+			desc.InputLayout.NumElements = setInputLayout(vertexElements, m_vertexDecls[_declIdx], program, _numInstanceData);
+			desc.InputLayout.pInputElementDescs = vertexElements;
+
+			uint8_t primIndex = uint8_t( (_state&BGFX_STATE_PT_MASK) >> BGFX_STATE_PT_SHIFT);
+			desc.PrimitiveTopologyType = s_primInfo[primIndex].m_topologyType;
+
+			if (isValid(m_fbh) )
+			{
+				const FrameBufferD3D12& frameBuffer = m_frameBuffers[m_fbh.idx];
+				desc.NumRenderTargets = frameBuffer.m_num;
+
+				for (uint8_t ii = 0, num = frameBuffer.m_num; ii < num; ++ii)
+				{
+					desc.RTVFormats[ii] = m_textures[frameBuffer.m_texture[ii].idx].m_srvd.Format;
+				}
+
+				if (isValid(frameBuffer.m_depth) )
+				{
+					desc.DSVFormat = s_textureFormat[m_textures[frameBuffer.m_depth.idx].m_textureFormat].m_fmtDsv;
+				}
+				else
+				{
+					desc.DSVFormat = DXGI_FORMAT_UNKNOWN;
+				}
+			}
+			else
+			{
+				desc.NumRenderTargets = 1;
+				desc.RTVFormats[0] = DXGI_FORMAT_R8G8B8A8_UNORM;
+				desc.DSVFormat     = DXGI_FORMAT_D24_UNORM_S8_UINT;
+			}
+
+			desc.SampleDesc.Count   = 1;
+			desc.SampleDesc.Quality = 0;
+
+			uint32_t length = g_callback->cacheReadSize(hash);
+			bool cached = length > 0;
+
+			void* cachedData = NULL;
+
+			if (cached)
+			{
+				cachedData = BX_ALLOC(g_allocator, length);
+				if (g_callback->cacheRead(hash, cachedData, length) )
+				{
+					BX_TRACE("Loading chached PSO (size %d).", length);
+					bx::MemoryReader reader(cachedData, length);
+
+// 					uint32_t format;
+// 					bx::read(&reader, format);
+
+					desc.CachedPSO.pCachedBlob           = reader.getDataPtr();
+					desc.CachedPSO.CachedBlobSizeInBytes = (size_t)reader.remaining();
+
+					HRESULT hr = m_device->CreateGraphicsPipelineState(&desc
+									, __uuidof(ID3D12PipelineState)
+									, (void**)&pso
+									);
+					if (FAILED(hr) )
+					{
+						BX_TRACE("Failed to load cached PSO (HRESULT 0x%08x).", hr);
+						memset(&desc.CachedPSO, 0, sizeof(desc.CachedPSO) );
+					}
+				}
+			}
+
+			if (NULL == pso)
+			{
+				DX_CHECK(m_device->CreateGraphicsPipelineState(&desc
+						, __uuidof(ID3D12PipelineState)
+						, (void**)&pso
+						) );
+			}
+			m_pipelineStateCache.add(hash, pso);
+
+			release(temp);
+
+			ID3DBlob* blob;
+			HRESULT hr = pso->GetCachedBlob(&blob);
+			if (SUCCEEDED(hr) )
+			{
+				void* data = blob->GetBufferPointer();
+				length = (uint32_t)blob->GetBufferSize();
+
+				g_callback->cacheWrite(hash, data, length);
+
+				DX_RELEASE(blob, 0);
+			}
+
+			if (NULL != cachedData)
+			{
+				BX_FREE(g_allocator, cachedData);
+			}
+
+			return pso;
+		}
+
+		uint16_t getSamplerState(const uint32_t* _flags, uint32_t _num = BGFX_CONFIG_MAX_TEXTURE_SAMPLERS)
+		{
+			bx::HashMurmur2A murmur;
+			murmur.begin();
+			murmur.add(_flags, _num * sizeof(uint32_t) );
+			uint32_t hash = murmur.end();
+
+			uint16_t sampler = m_samplerStateCache.find(hash);
+			if (UINT16_MAX == sampler)
+			{
+				sampler = m_samplerAllocator.alloc(_flags, _num);
+				m_samplerStateCache.add(hash, sampler);
+			}
+
+			return sampler;
+		}
+
+		void commit(ConstantBuffer& _constantBuffer)
+		{
+			_constantBuffer.reset();
+
+			for (;;)
+			{
+				uint32_t opcode = _constantBuffer.read();
+
+				if (UniformType::End == opcode)
+				{
+					break;
+				}
+
+				UniformType::Enum type;
+				uint16_t loc;
+				uint16_t num;
+				uint16_t copy;
+				ConstantBuffer::decodeOpcode(opcode, type, loc, num, copy);
+
+				const char* data;
+				if (copy)
+				{
+					data = _constantBuffer.read(g_uniformTypeSize[type]*num);
+				}
+				else
+				{
+					UniformHandle handle;
+					memcpy(&handle, _constantBuffer.read(sizeof(UniformHandle) ), sizeof(UniformHandle) );
+					data = (const char*)m_uniforms[handle.idx];
+				}
+
+#define CASE_IMPLEMENT_UNIFORM(_uniform, _dxsuffix, _type) \
+				case UniformType::_uniform: \
+				case UniformType::_uniform|BGFX_UNIFORM_FRAGMENTBIT: \
+						{ \
+							setShaderUniform(type, loc, data, num); \
+						} \
+						break;
+
+				switch ( (int32_t)type)
+				{
+				case UniformType::Mat3:
+				case UniformType::Mat3|BGFX_UNIFORM_FRAGMENTBIT:
+					 {
+						 float* value = (float*)data;
+						 for (uint32_t ii = 0, count = num/3; ii < count; ++ii,  loc += 3*16, value += 9)
+						 {
+							 Matrix4 mtx;
+							 mtx.un.val[ 0] = value[0];
+							 mtx.un.val[ 1] = value[1];
+							 mtx.un.val[ 2] = value[2];
+							 mtx.un.val[ 3] = 0.0f;
+							 mtx.un.val[ 4] = value[3];
+							 mtx.un.val[ 5] = value[4];
+							 mtx.un.val[ 6] = value[5];
+							 mtx.un.val[ 7] = 0.0f;
+							 mtx.un.val[ 8] = value[6];
+							 mtx.un.val[ 9] = value[7];
+							 mtx.un.val[10] = value[8];
+							 mtx.un.val[11] = 0.0f;
+							 setShaderUniform(type, loc, &mtx.un.val[0], 3);
+						 }
+					}
+					break;
+
+				CASE_IMPLEMENT_UNIFORM(Int1, I, int);
+				CASE_IMPLEMENT_UNIFORM(Vec4, F, float);
+				CASE_IMPLEMENT_UNIFORM(Mat4, F, float);
+
+				case UniformType::End:
+					break;
+
+				default:
+					BX_TRACE("%4d: INVALID 0x%08x, t %d, l %d, n %d, c %d", _constantBuffer.getPos(), opcode, type, loc, num, copy);
+					break;
+				}
+#undef CASE_IMPLEMENT_UNIFORM
+			}
+		}
+
+		void clear(const Clear& _clear, const float _palette[][4], const D3D12_RECT* _rect = NULL, uint32_t _num = 0)
+		{
+			if (isValid(m_fbh) )
+			{
+				FrameBufferD3D12& frameBuffer = m_frameBuffers[m_fbh.idx];
+				frameBuffer.clear(m_commandList, _clear, _palette);
+			}
+			else
+			{
+				if (NULL != m_currentColor
+				&&  BGFX_CLEAR_COLOR & _clear.m_flags)
+				{
+					if (BGFX_CLEAR_COLOR_USE_PALETTE & _clear.m_flags)
+					{
+						uint8_t index = _clear.m_index[0];
+						if (UINT8_MAX != index)
+						{
+							m_commandList->ClearRenderTargetView(*m_currentColor
+								, _palette[index]
+								, _num
+								, _rect
+								);
+						}
+					}
+					else
+					{
+						float frgba[4] =
+						{
+							_clear.m_index[0] * 1.0f / 255.0f,
+							_clear.m_index[1] * 1.0f / 255.0f,
+							_clear.m_index[2] * 1.0f / 255.0f,
+							_clear.m_index[3] * 1.0f / 255.0f,
+						};
+						m_commandList->ClearRenderTargetView(*m_currentColor
+							, frgba
+							, _num
+							, _rect
+							);
+					}
+				}
+
+				if (NULL != m_currentDepthStencil
+				&& (BGFX_CLEAR_DEPTH | BGFX_CLEAR_STENCIL) & _clear.m_flags)
+				{
+					uint32_t flags = 0;
+					flags |= (_clear.m_flags & BGFX_CLEAR_DEPTH  ) ? D3D12_CLEAR_FLAG_DEPTH   : 0;
+					flags |= (_clear.m_flags & BGFX_CLEAR_STENCIL) ? D3D12_CLEAR_FLAG_STENCIL : 0;
+
+					m_commandList->ClearDepthStencilView(*m_currentDepthStencil
+						, D3D12_CLEAR_FLAGS(flags)
+						, _clear.m_depth
+						, _clear.m_stencil
+						, _num
+						, _rect
+						);
+				}
+			}
+		}
+
+		void clearQuad(ClearQuad& _clearQuad, const Rect& _rect, const Clear& _clear, const float _palette[][4])
+		{
+			BX_UNUSED(_clearQuad);
+
+			uint32_t width  = m_scd.BufferDesc.Width;
+			uint32_t height = m_scd.BufferDesc.Height;
+
+			if (0      == _rect.m_x
+			&&  0      == _rect.m_y
+			&&  width  == _rect.m_width
+			&&  height == _rect.m_height)
+			{
+				clear(_clear, _palette);
+			}
+			else
+			{
+				D3D12_RECT rect;
+				rect.left   = _rect.m_x;
+				rect.top    = _rect.m_y;
+				rect.right  = _rect.m_x + _rect.m_width;
+				rect.bottom = _rect.m_y + _rect.m_height;
+				clear(_clear, _palette, &rect);
+			}
+		}
+
+		uint64_t kick()
+		{
+			uint64_t fence = m_cmd.kick();
+			m_commandList = m_cmd.alloc();
+			return fence;
+		}
+
+		void finish()
+		{
+			m_cmd.kick();
+			m_cmd.finish();
+			m_commandList = NULL;
+		}
+
+		void* m_d3d12dll;
+		void* m_dxgidll;
+
+		D3D_DRIVER_TYPE m_driverType;
+		IDXGIAdapter* m_adapter;
+		DXGI_ADAPTER_DESC m_adapterDesc;
+		D3D12_FEATURE_DATA_ARCHITECTURE m_architecture;
+		D3D12_FEATURE_DATA_D3D12_OPTIONS m_options;
+
+		IDXGIFactory1* m_factory;
+
+		IDXGISwapChain* m_swapChain;
+		uint16_t m_lost;
+		uint16_t m_numWindows;
+		FrameBufferHandle m_windows[BGFX_CONFIG_MAX_FRAME_BUFFERS];
+
+		ID3D12Device* m_device;
+		ID3D12InfoQueue* m_infoQueue;
+
+		ID3D12DescriptorHeap* m_rtvDescriptorHeap;
+		ID3D12DescriptorHeap* m_dsvDescriptorHeap;
+		D3D12_CPU_DESCRIPTOR_HANDLE m_rtvHandle;
+		D3D12_CPU_DESCRIPTOR_HANDLE m_dsvHandle;
+		D3D12_CPU_DESCRIPTOR_HANDLE* m_currentColor;
+		D3D12_CPU_DESCRIPTOR_HANDLE* m_currentDepthStencil;
+		ID3D12Resource* m_backBufferColor[4];
+		uint64_t m_backBufferColorFence[4];
+		ID3D12Resource* m_backBufferDepthStencil;
+
+		ScratchBufferD3D12 m_scratchBuffer[4];
+		DescriptorAllocator m_samplerAllocator;
+
+		ID3D12RootSignature* m_rootSignature;
+
+		CommandQueue m_cmd;
+		ID3D12GraphicsCommandList* m_commandList;
+
+		Resolution m_resolution;
+		bool m_wireframe;
+
+		DXGI_SWAP_CHAIN_DESC m_scd;
+		uint32_t m_flags;
+
+		BufferD3D12 m_indexBuffers[BGFX_CONFIG_MAX_INDEX_BUFFERS];
+		VertexBufferD3D12 m_vertexBuffers[BGFX_CONFIG_MAX_VERTEX_BUFFERS];
+		ShaderD3D12 m_shaders[BGFX_CONFIG_MAX_SHADERS];
+		ProgramD3D12 m_program[BGFX_CONFIG_MAX_PROGRAMS];
+		TextureD3D12 m_textures[BGFX_CONFIG_MAX_TEXTURES];
+		VertexDecl m_vertexDecls[BGFX_CONFIG_MAX_VERTEX_DECLS];
+		FrameBufferD3D12 m_frameBuffers[BGFX_CONFIG_MAX_FRAME_BUFFERS];
+		void* m_uniforms[BGFX_CONFIG_MAX_UNIFORMS];
+		Matrix4 m_predefinedUniforms[PredefinedUniform::Count];
+		UniformRegistry m_uniformReg;
+
+		StateCacheT<ID3D12PipelineState> m_pipelineStateCache;
+		StateCache m_samplerStateCache;
+
+		TextVideoMem m_textVideoMem;
+
+		ProgramD3D12* m_currentProgram;
+		uint8_t m_fsScratch[64<<10];
+		uint8_t m_vsScratch[64<<10];
+		uint32_t m_fsChanges;
+		uint32_t m_vsChanges;
+
+		FrameBufferHandle m_fbh;
+		uint32_t m_frame;
+		uint32_t m_backBufferColorIdx;
+		bool m_rtMsaa;
+	};
+
+	static RendererContextD3D12* s_renderD3D12;
+
+	RendererContextI* rendererCreate()
+	{
+		s_renderD3D12 = BX_NEW(g_allocator, RendererContextD3D12);
+		s_renderD3D12->init();
+		return s_renderD3D12;
+	}
+
+	void rendererDestroy()
+	{
+		BX_DELETE(g_allocator, s_renderD3D12);
+		s_renderD3D12 = NULL;
+	}
+
+	void ScratchBufferD3D12::create(uint32_t _size, uint32_t _maxDescriptors)
+	{
+		m_size = _size;
+
+		ID3D12Device* device = s_renderD3D12->m_device;
+
+		m_incrementSize = device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
+
+		D3D12_DESCRIPTOR_HEAP_DESC desc;
+		desc.NumDescriptors = _maxDescriptors;
+		desc.Type     = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
+		desc.Flags    = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
+		desc.NodeMask = 0;
+		DX_CHECK(device->CreateDescriptorHeap(&desc
+				, __uuidof(ID3D12DescriptorHeap)
+				, (void**)&m_heap
+				) );
+
+		m_upload = createCommittedResource(device, HeapProperty::Upload, desc.NumDescriptors * 1024);
+		m_upload->Map(0, NULL, (void**)&m_data);
+
+		reset(m_gpuHandle);
+	}
+
+	void ScratchBufferD3D12::destroy()
+	{
+		m_upload->Unmap(0, NULL);
+
+		DX_RELEASE(m_upload, 0);
+		DX_RELEASE(m_heap, 0);
+	}
+
+	void ScratchBufferD3D12::reset(D3D12_GPU_DESCRIPTOR_HANDLE& gpuHandle)
+	{
+		m_pos = 0;
+		m_cpuHandle = m_heap->GetCPUDescriptorHandleForHeapStart();
+		m_gpuHandle = m_heap->GetGPUDescriptorHandleForHeapStart();
+		gpuHandle = m_gpuHandle;
+	}
+
+	void* ScratchBufferD3D12::alloc(D3D12_GPU_DESCRIPTOR_HANDLE& gpuHandle, uint32_t _size)
+	{
+		D3D12_CONSTANT_BUFFER_VIEW_DESC desc;
+		desc.BufferLocation = m_upload->GetGPUVirtualAddress() + m_pos;
+		desc.SizeInBytes    = _size;
+
+		void* data = &m_data[m_pos];
+
+		m_pos += BX_ALIGN_256(_size);
+
+		ID3D12Device* device = s_renderD3D12->m_device;
+		device->CreateConstantBufferView(&desc
+			, m_cpuHandle
+			);
+		m_cpuHandle.ptr += m_incrementSize;
+
+		gpuHandle = m_gpuHandle;
+		m_gpuHandle.ptr += m_incrementSize;
+
+		return data;
+	}
+
+	void ScratchBufferD3D12::alloc(D3D12_GPU_DESCRIPTOR_HANDLE& gpuHandle, TextureD3D12& _texture)
+	{
+		ID3D12Device* device = s_renderD3D12->m_device;
+		device->CreateShaderResourceView(_texture.m_ptr
+			, &_texture.m_srvd
+			, m_cpuHandle
+			);
+		m_cpuHandle.ptr += m_incrementSize;
+
+		gpuHandle = m_gpuHandle;
+		m_gpuHandle.ptr += m_incrementSize;
+	}
+
+	void ScratchBufferD3D12::allocUav(D3D12_GPU_DESCRIPTOR_HANDLE& gpuHandle, TextureD3D12& _texture)
+	{
+		ID3D12Device* device = s_renderD3D12->m_device;
+		device->CreateUnorderedAccessView(_texture.m_ptr
+			, NULL
+			, &_texture.m_uavd
+			, m_cpuHandle
+			);
+		m_cpuHandle.ptr += m_incrementSize;
+
+		gpuHandle = m_gpuHandle;
+		m_gpuHandle.ptr += m_incrementSize;
+	}
+
+	void ScratchBufferD3D12::alloc(D3D12_GPU_DESCRIPTOR_HANDLE& gpuHandle, BufferD3D12& _buffer)
+	{
+		ID3D12Device* device = s_renderD3D12->m_device;
+		device->CreateShaderResourceView(_buffer.m_ptr
+			, &_buffer.m_srvd
+			, m_cpuHandle
+			);
+		m_cpuHandle.ptr += m_incrementSize;
+
+		gpuHandle = m_gpuHandle;
+		m_gpuHandle.ptr += m_incrementSize;
+	}
+
+	void ScratchBufferD3D12::allocUav(D3D12_GPU_DESCRIPTOR_HANDLE& gpuHandle, BufferD3D12& _buffer)
+	{
+		ID3D12Device* device = s_renderD3D12->m_device;
+		device->CreateUnorderedAccessView(_buffer.m_ptr
+			, NULL
+			, &_buffer.m_uavd
+			, m_cpuHandle
+			);
+		m_cpuHandle.ptr += m_incrementSize;
+
+		gpuHandle = m_gpuHandle;
+		m_gpuHandle.ptr += m_incrementSize;
+	}
+
+	void DescriptorAllocator::create(D3D12_DESCRIPTOR_HEAP_TYPE _type, uint32_t _maxDescriptors, uint16_t _numDescriptorsPerBlock)
+	{
+		m_handleAlloc = bx::createHandleAlloc(g_allocator, _maxDescriptors);
+		m_numDescriptorsPerBlock = _numDescriptorsPerBlock;
+
+		ID3D12Device* device = s_renderD3D12->m_device;
+
+		m_incrementSize = device->GetDescriptorHandleIncrementSize(_type);
+
+		D3D12_DESCRIPTOR_HEAP_DESC desc;
+		desc.NumDescriptors = _maxDescriptors;
+		desc.Type     = _type;
+		desc.Flags    = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
+		desc.NodeMask = 0;
+		DX_CHECK(device->CreateDescriptorHeap(&desc
+				, __uuidof(ID3D12DescriptorHeap)
+				, (void**)&m_heap
+				) );
+
+		m_cpuHandle = m_heap->GetCPUDescriptorHandleForHeapStart();
+		m_gpuHandle = m_heap->GetGPUDescriptorHandleForHeapStart();
+	}
+
+	void DescriptorAllocator::destroy()
+	{
+		bx::destroyHandleAlloc(g_allocator, m_handleAlloc);
+
+		DX_RELEASE(m_heap, 0);
+	}
+
+	uint16_t DescriptorAllocator::alloc(ID3D12Resource* _ptr, const D3D12_SHADER_RESOURCE_VIEW_DESC* _desc)
+	{
+		uint16_t idx = m_handleAlloc->alloc();
+
+		D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = { m_cpuHandle.ptr + idx * m_incrementSize };
+
+		ID3D12Device* device = s_renderD3D12->m_device;
+		device->CreateShaderResourceView(_ptr
+			, _desc
+			, cpuHandle
+			);
+
+		return idx;
+	}
+
+	uint16_t DescriptorAllocator::alloc(const uint32_t* _flags, uint32_t _num)
+	{
+		uint16_t idx = m_handleAlloc->alloc();
+
+		ID3D12Device* device = s_renderD3D12->m_device;
+
+		for (uint32_t ii = 0; ii < _num; ++ii)
+		{
+			uint32_t flags = _flags[ii];
+
+			const uint32_t cmpFunc   = (flags&BGFX_TEXTURE_COMPARE_MASK)>>BGFX_TEXTURE_COMPARE_SHIFT;
+			const uint8_t  minFilter = s_textureFilter[0][(flags&BGFX_TEXTURE_MIN_MASK)>>BGFX_TEXTURE_MIN_SHIFT];
+			const uint8_t  magFilter = s_textureFilter[1][(flags&BGFX_TEXTURE_MAG_MASK)>>BGFX_TEXTURE_MAG_SHIFT];
+			const uint8_t  mipFilter = s_textureFilter[2][(flags&BGFX_TEXTURE_MIP_MASK)>>BGFX_TEXTURE_MIP_SHIFT];
+			const uint8_t  filter    = 0 == cmpFunc ? 0 : D3D12_FILTER_COMPARISON_MIN_MAG_MIP_POINT;
+
+			D3D12_SAMPLER_DESC sd;
+			sd.Filter   = (D3D12_FILTER)(filter|minFilter|magFilter|mipFilter);
+			sd.AddressU = s_textureAddress[(flags&BGFX_TEXTURE_U_MASK)>>BGFX_TEXTURE_U_SHIFT];
+			sd.AddressV = s_textureAddress[(flags&BGFX_TEXTURE_V_MASK)>>BGFX_TEXTURE_V_SHIFT];
+			sd.AddressW = s_textureAddress[(flags&BGFX_TEXTURE_W_MASK)>>BGFX_TEXTURE_W_SHIFT];
+			sd.MinLOD   = 0;
+			sd.MaxLOD   = D3D12_FLOAT32_MAX;
+			sd.MipLODBias     = 0.0f;
+			sd.MaxAnisotropy  = 1; //m_maxAnisotropy;
+			sd.ComparisonFunc = 0 == cmpFunc ? D3D12_COMPARISON_FUNC_NEVER : s_cmpFunc[cmpFunc];
+
+			D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle =
+			{
+				m_cpuHandle.ptr + (idx * m_numDescriptorsPerBlock + ii) * m_incrementSize
+			};
+
+			device->CreateSampler(&sd, cpuHandle);
+		}
+
+		return idx;
+	}
+
+	void DescriptorAllocator::free(uint16_t _idx)
+	{
+		m_handleAlloc->free(_idx);
+	}
+
+	D3D12_GPU_DESCRIPTOR_HANDLE DescriptorAllocator::get(uint16_t _idx)
+	{
+		D3D12_GPU_DESCRIPTOR_HANDLE gpuHandle = { m_gpuHandle.ptr + _idx * m_numDescriptorsPerBlock * m_incrementSize };
+		return gpuHandle;
+	}
+
+	struct UavFormat
+	{
+		DXGI_FORMAT format[3];
+		uint32_t    stride;
+	};
+
+	static const UavFormat s_uavFormat[] =
+	{	//  BGFX_BUFFER_COMPUTE_TYPE_UINT, BGFX_BUFFER_COMPUTE_TYPE_INT,   BGFX_BUFFER_COMPUTE_TYPE_FLOAT
+		{ { DXGI_FORMAT_UNKNOWN,           DXGI_FORMAT_UNKNOWN,            DXGI_FORMAT_UNKNOWN            },  0 }, // ignored
+		{ { DXGI_FORMAT_R8_SINT,           DXGI_FORMAT_R8_UINT,            DXGI_FORMAT_UNKNOWN            },  1 }, // BGFX_BUFFER_COMPUTE_FORMAT_8x1
+		{ { DXGI_FORMAT_R8G8_SINT,         DXGI_FORMAT_R8G8_UINT,          DXGI_FORMAT_UNKNOWN            },  2 }, // BGFX_BUFFER_COMPUTE_FORMAT_8x2
+		{ { DXGI_FORMAT_R8G8B8A8_SINT,     DXGI_FORMAT_R8G8B8A8_UINT,      DXGI_FORMAT_UNKNOWN            },  4 }, // BGFX_BUFFER_COMPUTE_FORMAT_8x4
+		{ { DXGI_FORMAT_R16_SINT,          DXGI_FORMAT_R16_UINT,           DXGI_FORMAT_R16_FLOAT          },  2 }, // BGFX_BUFFER_COMPUTE_FORMAT_16x1
+		{ { DXGI_FORMAT_R16G16_SINT,       DXGI_FORMAT_R16G16_UINT,        DXGI_FORMAT_R16G16_FLOAT       },  4 }, // BGFX_BUFFER_COMPUTE_FORMAT_16x2
+		{ { DXGI_FORMAT_R16G16B16A16_SINT, DXGI_FORMAT_R16G16B16A16_UINT,  DXGI_FORMAT_R16G16B16A16_FLOAT },  8 }, // BGFX_BUFFER_COMPUTE_FORMAT_16x4
+		{ { DXGI_FORMAT_R32_SINT,          DXGI_FORMAT_R32_UINT,           DXGI_FORMAT_R32_FLOAT          },  4 }, // BGFX_BUFFER_COMPUTE_FORMAT_32x1
+		{ { DXGI_FORMAT_R32G32_SINT,       DXGI_FORMAT_R32G32_UINT,        DXGI_FORMAT_R32G32_FLOAT       },  8 }, // BGFX_BUFFER_COMPUTE_FORMAT_32x2
+		{ { DXGI_FORMAT_R32G32B32A32_SINT, DXGI_FORMAT_R32G32B32A32_UINT,  DXGI_FORMAT_R32G32B32A32_FLOAT }, 16 }, // BGFX_BUFFER_COMPUTE_FORMAT_32x4
+	};
+
+	void BufferD3D12::create(uint32_t _size, void* _data, uint16_t _flags, bool _vertex)
+	{
+		m_size    = _size;
+		m_flags   = _flags;
+
+		const bool needUav = 0 != (_flags & (BGFX_BUFFER_COMPUTE_WRITE|BGFX_BUFFER_DRAW_INDIRECT));
+//		const bool needSrv = 0 != (_flags & BGFX_BUFFER_COMPUTE_READ);
+		const bool drawIndirect = 0 != (_flags & BGFX_BUFFER_DRAW_INDIRECT);
+		m_dynamic = NULL == _data || needUav;
+
+		DXGI_FORMAT format;
+		uint32_t    stride;
+
+		D3D12_RESOURCE_FLAGS flags = needUav
+			? D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS
+			: D3D12_RESOURCE_FLAG_NONE
+			;
+
+		if(drawIndirect)
+		{
+			format = DXGI_FORMAT_R32G32B32A32_UINT;
+			stride = 16;
+		}
+		else
+		{
+			uint32_t uavFormat = (_flags & BGFX_BUFFER_COMPUTE_FORMAT_MASK) >> BGFX_BUFFER_COMPUTE_FORMAT_SHIFT;
+			if (0 == uavFormat)
+			{
+				if (_vertex)
+				{
+					format = DXGI_FORMAT_R32G32B32A32_FLOAT;
+					stride = 16;
+				}
+				else
+				{
+					if (0 == (_flags & BGFX_BUFFER_INDEX32) )
+					{
+						format = DXGI_FORMAT_R16_UINT;
+						stride = 2;
+					}
+					else
+					{
+						format = DXGI_FORMAT_R32_UINT;
+						stride = 4;
+					}
+				}
+			}
+			else
+			{
+				const uint32_t uavType = bx::uint32_satsub( (_flags & BGFX_BUFFER_COMPUTE_TYPE_MASK) >> BGFX_BUFFER_COMPUTE_TYPE_SHIFT, 1);
+				format = s_uavFormat[uavFormat].format[uavType];
+				stride = s_uavFormat[uavFormat].stride;
+			}
+		}
+
+		m_srvd.Format                      = format;
+		m_srvd.ViewDimension               = D3D12_SRV_DIMENSION_BUFFER;
+		m_srvd.Shader4ComponentMapping     = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
+		m_srvd.Buffer.FirstElement         = 0;
+		m_srvd.Buffer.NumElements          = m_size / stride;
+		m_srvd.Buffer.StructureByteStride  = 0;
+		m_srvd.Buffer.Flags                = D3D12_BUFFER_SRV_FLAG_NONE;
+
+		m_uavd.Format                      = format;
+		m_uavd.ViewDimension               = D3D12_UAV_DIMENSION_BUFFER;
+		m_uavd.Buffer.FirstElement         = 0;
+		m_uavd.Buffer.NumElements          = m_size / stride;
+		m_uavd.Buffer.StructureByteStride  = 0;
+		m_uavd.Buffer.CounterOffsetInBytes = 0;
+		m_uavd.Buffer.Flags                = D3D12_BUFFER_UAV_FLAG_NONE;
+
+		ID3D12Device* device = s_renderD3D12->m_device;
+		ID3D12GraphicsCommandList* commandList = s_renderD3D12->m_commandList;
+
+		m_ptr = createCommittedResource(device, HeapProperty::Default, _size, flags);
+
+		if (!needUav)
+		{
+			m_staging = createCommittedResource(device, HeapProperty::Upload,  _size);
+		}
+
+		if (m_dynamic)
+		{
+			setState(commandList, D3D12_RESOURCE_STATE_GENERIC_READ);
+		}
+		else
+		{
+			setState(commandList, D3D12_RESOURCE_STATE_COPY_DEST);
+
+			D3D12_SUBRESOURCE_DATA subresource;
+			subresource.pData      = _data;
+			subresource.RowPitch   = _size;
+			subresource.SlicePitch = subresource.RowPitch;
+
+			UpdateSubresources<1>(commandList
+				, m_ptr
+				, m_staging
+				, 0
+				, 0
+				, 1
+				, &subresource
+				);
+
+			setState(commandList, D3D12_RESOURCE_STATE_GENERIC_READ);
+		}
+	}
+
+	void BufferD3D12::update(ID3D12GraphicsCommandList* _commandList, uint32_t /*_offset*/, uint32_t _size, void* _data, bool /*_discard*/)
+	{
+		setState(_commandList, D3D12_RESOURCE_STATE_COPY_DEST);
+
+		D3D12_SUBRESOURCE_DATA subresource;
+		subresource.pData      = _data;
+		subresource.RowPitch   = _size;
+		subresource.SlicePitch = subresource.RowPitch;
+
+		UpdateSubresources<1>(_commandList
+			, m_ptr
+			, m_staging
+			, 0
+			, 0
+			, 1
+			, &subresource
+			);
+
+		setState(_commandList, D3D12_RESOURCE_STATE_GENERIC_READ);
+	}
+
+	void BufferD3D12::setState(ID3D12GraphicsCommandList* _commandList, D3D12_RESOURCE_STATES _state)
+	{
+		if (m_state != _state)
+		{
+			setResourceBarrier(_commandList
+				, m_ptr
+				, m_state
+				, _state
+				);
+
+			m_state = _state;
+		}
+	}
+
+	void VertexBufferD3D12::create(uint32_t _size, void* _data, VertexDeclHandle _declHandle, uint16_t _flags)
+	{
+		BufferD3D12::create(_size, _data, _flags, true);
+		m_decl = _declHandle;
+	}
+
+	void ShaderD3D12::create(const Memory* _mem)
+	{
+		bx::MemoryReader reader(_mem->data, _mem->size);
+
+		uint32_t magic;
+		bx::read(&reader, magic);
+
+		switch (magic)
+		{
+		case BGFX_CHUNK_MAGIC_CSH:
+		case BGFX_CHUNK_MAGIC_FSH:
+		case BGFX_CHUNK_MAGIC_VSH:
+			break;
+
+		default:
+			BGFX_FATAL(false, Fatal::InvalidShader, "Unknown shader format %x.", magic);
+			break;
+		}
+
+		bool fragment = BGFX_CHUNK_MAGIC_FSH == magic;
+
+		uint32_t iohash;
+		bx::read(&reader, iohash);
+
+		uint16_t count;
+		bx::read(&reader, count);
+
+		m_numPredefined = 0;
+		m_numUniforms = count;
+
+		BX_TRACE("%s Shader consts %d"
+			, BGFX_CHUNK_MAGIC_FSH == magic ? "Fragment" : BGFX_CHUNK_MAGIC_VSH == magic ? "Vertex" : "Compute"
+			, count
+			);
+
+		uint8_t fragmentBit = fragment ? BGFX_UNIFORM_FRAGMENTBIT : 0;
+
+		if (0 < count)
+		{
+			for (uint32_t ii = 0; ii < count; ++ii)
+			{
+				uint8_t nameSize;
+				bx::read(&reader, nameSize);
+
+				char name[256];
+				bx::read(&reader, &name, nameSize);
+				name[nameSize] = '\0';
+
+				uint8_t type;
+				bx::read(&reader, type);
+
+				uint8_t num;
+				bx::read(&reader, num);
+
+				uint16_t regIndex;
+				bx::read(&reader, regIndex);
+
+				uint16_t regCount;
+				bx::read(&reader, regCount);
+
+				const char* kind = "invalid";
+
+				PredefinedUniform::Enum predefined = nameToPredefinedUniformEnum(name);
+				if (PredefinedUniform::Count != predefined)
+				{
+					kind = "predefined";
+					m_predefined[m_numPredefined].m_loc   = regIndex;
+					m_predefined[m_numPredefined].m_count = regCount;
+					m_predefined[m_numPredefined].m_type  = predefined|fragmentBit;
+					m_numPredefined++;
+				}
+				else
+				{
+					const UniformInfo* info = s_renderD3D12->m_uniformReg.find(name);
+
+					if (NULL != info)
+					{
+						if (NULL == m_constantBuffer)
+						{
+							m_constantBuffer = ConstantBuffer::create(1024);
+						}
+
+						kind = "user";
+						m_constantBuffer->writeUniformHandle( (UniformType::Enum)(type|fragmentBit), regIndex, info->m_handle, regCount);
+					}
+				}
+
+				BX_TRACE("\t%s: %s (%s), num %2d, r.index %3d, r.count %2d"
+					, kind
+					, name
+					, getUniformTypeName(UniformType::Enum(type&~BGFX_UNIFORM_FRAGMENTBIT) )
+					, num
+					, regIndex
+					, regCount
+					);
+				BX_UNUSED(kind);
+			}
+
+			if (NULL != m_constantBuffer)
+			{
+				m_constantBuffer->finish();
+			}
+		}
+
+		uint16_t shaderSize;
+		bx::read(&reader, shaderSize);
+
+		const DWORD* code = (const DWORD*)reader.getDataPtr();
+		bx::skip(&reader, shaderSize+1);
+
+		m_code = copy(code, shaderSize);
+
+		uint8_t numAttrs;
+		bx::read(&reader, numAttrs);
+
+		memset(m_attrMask, 0, sizeof(m_attrMask) );
+
+		for (uint32_t ii = 0; ii < numAttrs; ++ii)
+		{
+			uint16_t id;
+			bx::read(&reader, id);
+
+			Attrib::Enum attr = idToAttrib(id);
+
+			if (Attrib::Count != attr)
+			{
+				m_attrMask[attr] = 0xff;
+			}
+		}
+
+		bx::HashMurmur2A murmur;
+		murmur.begin();
+		murmur.add(iohash);
+		murmur.add(code, shaderSize);
+		murmur.add(numAttrs);
+		murmur.add(m_attrMask, numAttrs);
+		m_hash = murmur.end();
+
+		bx::read(&reader, m_size);
+	}
+
+	void TextureD3D12::create(const Memory* _mem, uint32_t _flags, uint8_t _skip)
+	{
+		ImageContainer imageContainer;
+
+		if (imageParse(imageContainer, _mem->data, _mem->size) )
+		{
+			uint8_t numMips = imageContainer.m_numMips;
+			const uint32_t startLod = bx::uint32_min(_skip, numMips-1);
+			numMips -= startLod;
+			const ImageBlockInfo& blockInfo = getBlockInfo(TextureFormat::Enum(imageContainer.m_format) );
+			const uint32_t textureWidth  = bx::uint32_max(blockInfo.blockWidth,  imageContainer.m_width >>startLod);
+			const uint32_t textureHeight = bx::uint32_max(blockInfo.blockHeight, imageContainer.m_height>>startLod);
+
+			m_flags = _flags;
+			m_requestedFormat = (uint8_t)imageContainer.m_format;
+			m_textureFormat   = (uint8_t)imageContainer.m_format;
+
+			const TextureFormatInfo& tfi = s_textureFormat[m_requestedFormat];
+			const bool convert = DXGI_FORMAT_UNKNOWN == tfi.m_fmt;
+
+			uint8_t bpp = getBitsPerPixel(TextureFormat::Enum(m_textureFormat) );
+			if (convert)
+			{
+				m_textureFormat = (uint8_t)TextureFormat::BGRA8;
+				bpp = 32;
+			}
+
+			if (imageContainer.m_cubeMap)
+			{
+				m_type = TextureCube;
+			}
+			else if (imageContainer.m_depth > 1)
+			{
+				m_type = Texture3D;
+			}
+			else
+			{
+				m_type = Texture2D;
+			}
+
+			m_numMips = numMips;
+			const uint32_t numSides = imageContainer.m_cubeMap ? 6 : 1;
+
+			uint32_t numSrd = numMips*numSides;
+			D3D12_SUBRESOURCE_DATA* srd = (D3D12_SUBRESOURCE_DATA*)alloca(numSrd*sizeof(D3D12_SUBRESOURCE_DATA) );
+
+			uint32_t kk = 0;
+
+			const bool compressed = isCompressed(TextureFormat::Enum(m_textureFormat) );
+			const bool swizzle    = TextureFormat::BGRA8 == m_textureFormat && 0 != (m_flags&BGFX_TEXTURE_COMPUTE_WRITE);
+			uint32_t blockWidth   = 1;
+			uint32_t blockHeight  = 1;
+
+			if (convert && compressed)
+			{
+				blockWidth  = blockInfo.blockWidth;
+				blockHeight = blockInfo.blockHeight;
+			}
+
+			const bool bufferOnly   = 0 != (m_flags&BGFX_TEXTURE_RT_BUFFER_ONLY);
+			const bool computeWrite = 0 != (m_flags&BGFX_TEXTURE_COMPUTE_WRITE);
+			const bool renderTarget = 0 != (m_flags&BGFX_TEXTURE_RT_MASK);
+
+			BX_TRACE("Texture %3d: %s (requested: %s), %dx%d%s RT[%c], BO[%c], CW[%c]%s."
+				, this - s_renderD3D12->m_textures
+				, getName( (TextureFormat::Enum)m_textureFormat)
+				, getName( (TextureFormat::Enum)m_requestedFormat)
+				, textureWidth
+				, textureHeight
+				, imageContainer.m_cubeMap ? "x6" : ""
+				, renderTarget ? 'x' : ' '
+				, bufferOnly   ? 'x' : ' '
+				, computeWrite ? 'x' : ' '
+				, swizzle ? " (swizzle BGRA8 -> RGBA8)" : ""
+				);
+
+			uint32_t totalSize = 0;
+
+			for (uint8_t side = 0; side < numSides; ++side)
+			{
+				uint32_t width  = textureWidth;
+				uint32_t height = textureHeight;
+				uint32_t depth  = imageContainer.m_depth;
+
+				for (uint32_t lod = 0; lod < numMips; ++lod)
+				{
+					width  = bx::uint32_max(blockWidth,  width);
+					height = bx::uint32_max(blockHeight, height);
+					depth  = bx::uint32_max(1, depth);
+
+					ImageMip mip;
+					if (imageGetRawData(imageContainer, side, lod+startLod, _mem->data, _mem->size, mip) )
+					{
+						if (convert)
+						{
+							const uint32_t pitch = bx::strideAlign(width*bpp / 8,  D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
+							const uint32_t slice = bx::strideAlign(pitch * height, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT);
+
+							uint8_t* temp = (uint8_t*)BX_ALLOC(g_allocator, slice);
+							imageDecodeToBgra8(temp
+									, mip.m_data
+									, mip.m_width
+									, mip.m_height
+									, pitch, mip.m_format
+									);
+
+							srd[kk].pData      = temp;
+							srd[kk].RowPitch   = pitch;
+							srd[kk].SlicePitch = slice;
+							totalSize += slice;
+						}
+						else if (compressed)
+						{
+							uint32_t pitch = bx::strideAlign( (mip.m_width /blockInfo.blockWidth )*mip.m_blockSize, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
+							uint32_t slice = bx::strideAlign( (mip.m_height/blockInfo.blockHeight)*pitch,           D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT);
+
+							uint8_t* temp = (uint8_t*)BX_ALLOC(g_allocator, slice);
+							imageCopy(mip.m_height/blockInfo.blockHeight
+									, (mip.m_width /blockInfo.blockWidth )*mip.m_blockSize
+									, mip.m_data
+									, pitch
+									, temp
+									);
+
+							srd[kk].pData      = temp;
+							srd[kk].RowPitch   = pitch;
+							srd[kk].SlicePitch = slice;
+							totalSize += slice;
+						}
+						else
+						{
+							const uint32_t pitch = bx::strideAlign(mip.m_width*mip.m_bpp / 8, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
+							const uint32_t slice = bx::strideAlign(pitch * mip.m_height,      D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT);
+
+							uint8_t* temp = (uint8_t*)BX_ALLOC(g_allocator, slice);
+							imageCopy(mip.m_height
+									, mip.m_width*mip.m_bpp / 8
+									, mip.m_data
+									, pitch
+									, temp
+									);
+
+							srd[kk].pData = temp;
+							srd[kk].RowPitch   = pitch;
+							srd[kk].SlicePitch = slice;
+							totalSize += slice;
+						}
+
+ 						if (swizzle)
+ 						{
+// 							imageSwizzleBgra8(width, height, mip.m_width*4, data, temp);
+ 						}
+
+						srd[kk].SlicePitch = mip.m_height*srd[kk].RowPitch;
+						++kk;
+					}
+					else
+					{
+						const uint32_t pitch = bx::strideAlign(width*bpp / 8, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
+						const uint32_t slice = bx::strideAlign(pitch * height, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT);
+						totalSize += slice;
+					}
+
+					width  >>= 1;
+					height >>= 1;
+					depth  >>= 1;
+				}
+			}
+
+			BX_TRACE("texture total size: %d", totalSize);
+
+			const uint32_t msaaQuality = bx::uint32_satsub( (m_flags&BGFX_TEXTURE_RT_MSAA_MASK)>>BGFX_TEXTURE_RT_MSAA_SHIFT, 1);
+			const DXGI_SAMPLE_DESC& msaa = s_msaa[msaaQuality];
+
+			memset(&m_srvd, 0, sizeof(m_srvd) );
+			m_srvd.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
+			m_srvd.Format = s_textureFormat[m_textureFormat].m_fmtSrv;
+			DXGI_FORMAT format = s_textureFormat[m_textureFormat].m_fmt;
+			if (swizzle)
+			{
+				format        = DXGI_FORMAT_R8G8B8A8_UNORM;
+				m_srvd.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
+			}
+
+			m_uavd.Format = m_srvd.Format;
+
+			ID3D12Device* device = s_renderD3D12->m_device;
+			ID3D12GraphicsCommandList* commandList = s_renderD3D12->m_commandList;
+
+			D3D12_RESOURCE_DESC resourceDesc;
+			resourceDesc.Alignment  = 0;
+			resourceDesc.Width      = textureWidth;
+			resourceDesc.Height     = textureHeight;
+			resourceDesc.MipLevels  = numMips;
+			resourceDesc.Format     = format;
+			resourceDesc.SampleDesc = msaa;
+			resourceDesc.Layout     = D3D12_TEXTURE_LAYOUT_UNKNOWN;
+			resourceDesc.Flags      = D3D12_RESOURCE_FLAG_NONE;
+			resourceDesc.DepthOrArraySize = numSides;
+
+			D3D12_RESOURCE_STATES state = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;
+
+			D3D12_CLEAR_VALUE* clearValue = NULL;
+			if (isDepth(TextureFormat::Enum(m_textureFormat) ) )
+			{
+				resourceDesc.Format = s_textureFormat[m_textureFormat].m_fmt;
+				resourceDesc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL;
+				state              |= D3D12_RESOURCE_STATE_DEPTH_WRITE;
+				state              &= ~D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;
+
+				clearValue = (D3D12_CLEAR_VALUE*)alloca(sizeof(D3D12_CLEAR_VALUE) );
+				clearValue->Format = s_textureFormat[m_textureFormat].m_fmtDsv;
+				clearValue->DepthStencil.Depth   = 1.0f;
+				clearValue->DepthStencil.Stencil = 0;
+			}
+			else if (renderTarget)
+			{
+				clearValue = (D3D12_CLEAR_VALUE*)alloca(sizeof(D3D12_CLEAR_VALUE) );
+				clearValue->Format = resourceDesc.Format;
+				clearValue->Color[0] = 0.0f;
+				clearValue->Color[1] = 0.0f;
+				clearValue->Color[2] = 0.0f;
+				clearValue->Color[3] = 0.0f;
+
+				resourceDesc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
+			}
+
+			if (bufferOnly)
+			{
+				resourceDesc.Flags |= D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE;
+				state              &= ~D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;
+			}
+
+			if (computeWrite)
+			{
+				resourceDesc.Flags &= ~D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
+			}
+
+			switch (m_type)
+			{
+			case Texture2D:
+				resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
+				m_srvd.ViewDimension                 = 1 < msaa.Count ? D3D12_SRV_DIMENSION_TEXTURE2DMS : D3D12_SRV_DIMENSION_TEXTURE2D;
+				m_srvd.Texture2D.MostDetailedMip     = 0;
+				m_srvd.Texture2D.MipLevels           = numMips;
+				m_srvd.Texture2D.ResourceMinLODClamp = 0.0f;
+
+				m_uavd.ViewDimension        = D3D12_UAV_DIMENSION_TEXTURE2D;
+				m_uavd.Texture2D.MipSlice   = 0;
+				m_uavd.Texture2D.PlaneSlice = 0;
+				break;
+
+			case Texture3D:
+				resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE3D;
+				m_srvd.ViewDimension                 = D3D12_SRV_DIMENSION_TEXTURE3D;
+				m_srvd.Texture3D.MostDetailedMip     = 0;
+				m_srvd.Texture3D.MipLevels           = numMips;
+				m_srvd.Texture3D.ResourceMinLODClamp = 0.0f;
+
+				m_uavd.ViewDimension         = D3D12_UAV_DIMENSION_TEXTURE3D;
+				m_uavd.Texture3D.MipSlice    = 0;
+				m_uavd.Texture3D.FirstWSlice = 0;
+				m_uavd.Texture3D.WSize       = 0;
+				break;
+
+			case TextureCube:
+				resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
+				m_srvd.ViewDimension                   = D3D12_SRV_DIMENSION_TEXTURECUBE;
+				m_srvd.TextureCube.MostDetailedMip     = 0;
+				m_srvd.TextureCube.MipLevels           = numMips;
+				m_srvd.TextureCube.ResourceMinLODClamp = 0.0f;
+
+				m_uavd.ViewDimension        = D3D12_UAV_DIMENSION_TEXTURE2D;
+				m_uavd.Texture2D.MipSlice   = 0;
+				m_uavd.Texture2D.PlaneSlice = 0;
+				break;
+			}
+
+			m_ptr = createCommittedResource(device, HeapProperty::Default, &resourceDesc, clearValue);
+
+			{
+				uint64_t uploadBufferSize;
+				uint32_t* numRows        = (uint32_t*)alloca(sizeof(uint32_t)*numSrd);
+				uint64_t* rowSizeInBytes = (uint64_t*)alloca(sizeof(uint64_t)*numSrd);
+				D3D12_PLACED_SUBRESOURCE_FOOTPRINT* layouts = (D3D12_PLACED_SUBRESOURCE_FOOTPRINT*)alloca(sizeof(D3D12_PLACED_SUBRESOURCE_FOOTPRINT)*numSrd);
+
+				device->GetCopyableFootprints(&resourceDesc
+					, 0
+					, numSrd
+					, 0
+					, layouts
+					, numRows
+					, rowSizeInBytes
+					, &uploadBufferSize
+					);
+				BX_WARN(uploadBufferSize == totalSize, "uploadBufferSize %d (totalSize %d), numRows %d, rowSizeInBytes %d"
+					, uploadBufferSize
+					, totalSize
+					, numRows[0]
+					, rowSizeInBytes[0]
+					);
+			}
+
+			if (kk != 0)
+			{
+				m_staging = createCommittedResource(device, HeapProperty::Upload, totalSize);
+
+				setState(commandList,D3D12_RESOURCE_STATE_COPY_DEST);
+
+				uint64_t result = UpdateSubresources(commandList
+					, m_ptr
+					, m_staging
+					, 0
+					, 0
+					, numSrd
+					, srd
+					);
+				BX_CHECK(0 != result, "Invalid size");
+				BX_TRACE("Update subresource %" PRId64, result);
+
+				setState(commandList, state);
+			}
+			else
+			{
+				m_staging = NULL;
+
+				setState(commandList, state);
+			}
+
+			if (0 != kk)
+			{
+				kk = 0;
+				for (uint8_t side = 0; side < numSides; ++side)
+				{
+					for (uint32_t lod = 0, num = numMips; lod < num; ++lod)
+					{
+						BX_FREE(g_allocator, const_cast<void*>(srd[kk].pData) );
+						++kk;
+					}
+				}
+			}
+		}
+	}
+
+	void TextureD3D12::destroy()
+	{
+		if (NULL != m_ptr)
+		{
+			DX_RELEASE(m_ptr, 0);
+			m_ptr = NULL;
+
+			DX_RELEASE(m_staging, 0);
+			m_staging = NULL;
+		}
+	}
+
+	void TextureD3D12::update(ID3D12GraphicsCommandList* _commandList, uint8_t _side, uint8_t _mip, const Rect& _rect, uint16_t _z, uint16_t _depth, uint16_t _pitch, const Memory* _mem)
+	{
+		setState(_commandList, D3D12_RESOURCE_STATE_COPY_DEST);
+
+		const uint32_t subres = _mip + (_side * m_numMips);
+		const uint32_t bpp    = getBitsPerPixel(TextureFormat::Enum(m_textureFormat) );
+		const uint32_t rectpitch = _rect.m_width*bpp/8;
+		const uint32_t srcpitch  = UINT16_MAX == _pitch ? rectpitch : _pitch;
+
+		s_renderD3D12->m_cmd.finish(s_renderD3D12->m_cmd.kick() );
+		s_renderD3D12->m_commandList = s_renderD3D12->m_cmd.alloc();
+		_commandList = s_renderD3D12->m_commandList;
+
+		DX_RELEASE(m_staging, 0);
+
+		D3D12_RESOURCE_DESC desc = m_ptr->GetDesc();
+
+		desc.Height = _rect.m_height;
+
+		uint32_t numRows;
+		uint64_t rowPitch;
+		uint64_t totalBytes;
+		D3D12_PLACED_SUBRESOURCE_FOOTPRINT layout;
+		s_renderD3D12->m_device->GetCopyableFootprints(&desc
+			, subres
+			, 1
+			, 0
+			, &layout
+			, &numRows
+			, &rowPitch
+			, &totalBytes
+			);
+
+		m_staging = createCommittedResource(s_renderD3D12->m_device, HeapProperty::Upload, totalBytes);
+		DX_NAME(m_staging, "texture %4d: staging, update", this - s_renderD3D12->m_textures);
+
+		uint8_t* data;
+
+		DX_CHECK(m_staging->Map(0, NULL, (void**)&data) );
+		for (uint32_t ii = 0, height = _rect.m_height; ii < height; ++ii)
+		{
+			memcpy(&data[ii*rowPitch], &_mem->data[ii*srcpitch], srcpitch);
+		}
+		m_staging->Unmap(0, NULL);
+
+		D3D12_BOX box;
+		box.left   = 0;
+		box.top    = 0;
+		box.right  = box.left + _rect.m_width;
+		box.bottom = box.top  + _rect.m_height;
+		box.front  = _z;
+		box.back   = _z+_depth;
+
+		D3D12_TEXTURE_COPY_LOCATION dst = { m_ptr,     D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, { subres } };
+		D3D12_TEXTURE_COPY_LOCATION src = { m_staging, D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT,  layout     };
+		_commandList->CopyTextureRegion(&dst, _rect.m_x, _rect.m_y, 0, &src, &box);
+
+		setState(_commandList, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
+	}
+
+	void TextureD3D12::commit(uint8_t _stage, uint32_t _flags)
+	{
+		BX_UNUSED(_stage, _flags);
+	}
+
+	void TextureD3D12::resolve()
+	{
+	}
+
+	void TextureD3D12::setState(ID3D12GraphicsCommandList* _commandList, D3D12_RESOURCE_STATES _state)
+	{
+		if (m_state != _state)
+		{
+			setResourceBarrier(_commandList
+				, m_ptr
+				, m_state
+				, _state
+				);
+
+			m_state = _state;
+		}
+	}
+
+	void FrameBufferD3D12::create(uint8_t _num, const TextureHandle* _handles)
+	{
+		m_numTh = _num;
+		memcpy(m_th, _handles, _num*sizeof(TextureHandle) );
+
+		postReset();
+	}
+
+	void FrameBufferD3D12::create(uint16_t /*_denseIdx*/, void* /*_nwh*/, uint32_t /*_width*/, uint32_t /*_height*/, TextureFormat::Enum /*_depthFormat*/)
+	{
+	}
+
+	void FrameBufferD3D12::preReset()
+	{
+	}
+
+	void FrameBufferD3D12::postReset()
+	{
+		if (m_numTh != 0)
+		{
+			ID3D12Device* device = s_renderD3D12->m_device;
+
+			D3D12_CPU_DESCRIPTOR_HANDLE rtvDescriptor = s_renderD3D12->m_rtvDescriptorHeap->GetCPUDescriptorHandleForHeapStart();
+			uint32_t rtvDescriptorSize = device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
+			uint32_t fbhIdx = (uint32_t)(this - s_renderD3D12->m_frameBuffers);
+			rtvDescriptor.ptr += (BX_COUNTOF(s_renderD3D12->m_backBufferColor) + fbhIdx * BGFX_CONFIG_MAX_FRAME_BUFFER_ATTACHMENTS) * rtvDescriptorSize;
+
+			m_depth.idx = bgfx::invalidHandle;
+			m_num = 0;
+			for (uint32_t ii = 0; ii < m_numTh; ++ii)
+			{
+				TextureHandle handle = m_th[ii];
+				if (isValid(handle) )
+				{
+					const TextureD3D12& texture = s_renderD3D12->m_textures[handle.idx];
+					if (isDepth( (TextureFormat::Enum)texture.m_textureFormat) )
+					{
+						BX_CHECK(!isValid(m_depth), "");
+						m_depth = handle;
+						D3D12_CPU_DESCRIPTOR_HANDLE dsvDescriptor = s_renderD3D12->m_dsvDescriptorHeap->GetCPUDescriptorHandleForHeapStart();
+						uint32_t dsvDescriptorSize = device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_DSV);
+						dsvDescriptor.ptr += (1 + fbhIdx) * dsvDescriptorSize;
+
+						const ImageBlockInfo& blockInfo = getBlockInfo(TextureFormat::Enum(texture.m_textureFormat) );
+						BX_UNUSED(blockInfo);
+
+						D3D12_DEPTH_STENCIL_VIEW_DESC dsvDesc;
+						ZeroMemory(&dsvDesc, sizeof(dsvDesc) );
+						dsvDesc.Format        = s_textureFormat[texture.m_textureFormat].m_fmtDsv;
+						dsvDesc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2D;
+						dsvDesc.Flags         = D3D12_DSV_FLAG_NONE
+// 							| (blockInfo.depthBits   > 0 ? D3D12_DSV_FLAG_READ_ONLY_DEPTH   : D3D12_DSV_FLAG_NONE)
+// 							| (blockInfo.stencilBits > 0 ? D3D12_DSV_FLAG_READ_ONLY_STENCIL : D3D12_DSV_FLAG_NONE)
+							;
+
+						device->CreateDepthStencilView(texture.m_ptr
+							, &dsvDesc
+							, dsvDescriptor
+							);
+					}
+					else
+					{
+						m_texture[m_num] = handle;
+						D3D12_CPU_DESCRIPTOR_HANDLE rtv = { rtvDescriptor.ptr + m_num * rtvDescriptorSize };
+						device->CreateRenderTargetView(texture.m_ptr
+							, NULL
+							, rtv
+							);
+						m_num++;
+					}
+				}
+			}
+		}
+	}
+
+	uint16_t FrameBufferD3D12::destroy()
+	{
+		m_numTh = 0;
+
+		m_depth.idx = bgfx::invalidHandle;
+
+		uint16_t denseIdx = m_denseIdx;
+		m_denseIdx = UINT16_MAX;
+
+		return denseIdx;
+	}
+
+	void FrameBufferD3D12::resolve()
+	{
+	}
+
+	void FrameBufferD3D12::clear(ID3D12GraphicsCommandList* _commandList, const Clear& _clear, const float _palette[][4], const D3D12_RECT* _rect, uint32_t _num)
+	{
+		ID3D12Device* device = s_renderD3D12->m_device;
+		const uint32_t fbhIdx = (uint32_t)(this - s_renderD3D12->m_frameBuffers);
+
+		if (BGFX_CLEAR_COLOR & _clear.m_flags
+		&&  0 != m_num)
+		{
+			D3D12_CPU_DESCRIPTOR_HANDLE rtvDescriptor = s_renderD3D12->m_rtvDescriptorHeap->GetCPUDescriptorHandleForHeapStart();
+			uint32_t rtvDescriptorSize = device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
+			rtvDescriptor.ptr += (BX_COUNTOF(s_renderD3D12->m_backBufferColor) + fbhIdx * BGFX_CONFIG_MAX_FRAME_BUFFER_ATTACHMENTS) * rtvDescriptorSize;
+
+			if (BGFX_CLEAR_COLOR_USE_PALETTE & _clear.m_flags)
+			{
+				for (uint32_t ii = 0, num = m_num; ii < num; ++ii)
+				{
+					uint8_t index = _clear.m_index[ii];
+					if (UINT8_MAX != index)
+					{
+						D3D12_CPU_DESCRIPTOR_HANDLE rtv = { rtvDescriptor.ptr + ii * rtvDescriptorSize };
+						_commandList->ClearRenderTargetView(rtv
+								, _palette[index]
+								, _num
+								, _rect
+								);
+					}
+				}
+			}
+			else
+			{
+				float frgba[4] =
+				{
+					_clear.m_index[0]*1.0f/255.0f,
+					_clear.m_index[1]*1.0f/255.0f,
+					_clear.m_index[2]*1.0f/255.0f,
+					_clear.m_index[3]*1.0f/255.0f,
+				};
+				for (uint32_t ii = 0, num = m_num; ii < num; ++ii)
+				{
+					D3D12_CPU_DESCRIPTOR_HANDLE rtv = { rtvDescriptor.ptr + ii * rtvDescriptorSize };
+					_commandList->ClearRenderTargetView(rtv
+						, frgba
+						, _num
+						, _rect
+						);
+				}
+			}
+		}
+
+		if (isValid(m_depth)
+		&& (BGFX_CLEAR_DEPTH|BGFX_CLEAR_STENCIL) & _clear.m_flags)
+		{
+			D3D12_CPU_DESCRIPTOR_HANDLE dsvDescriptor = s_renderD3D12->m_dsvDescriptorHeap->GetCPUDescriptorHandleForHeapStart();
+			uint32_t dsvDescriptorSize = device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_DSV);
+			dsvDescriptor.ptr += (1 + fbhIdx) * dsvDescriptorSize;
+
+			DWORD flags = 0;
+			flags |= (_clear.m_flags & BGFX_CLEAR_DEPTH)   ? D3D12_CLEAR_FLAG_DEPTH   : 0;
+			flags |= (_clear.m_flags & BGFX_CLEAR_STENCIL) ? D3D12_CLEAR_FLAG_STENCIL : 0;
+
+			_commandList->ClearDepthStencilView(dsvDescriptor
+				, D3D12_CLEAR_FLAGS(flags)
+				, _clear.m_depth
+				, _clear.m_stencil
+				, _num
+				, _rect
+				);
+		}
+	}
+
+	void RendererContextD3D12::submit(Frame* _render, ClearQuad& _clearQuad, TextVideoMemBlitter& _textVideoMemBlitter)
+	{
+//		PIX_BEGINEVENT(D3DCOLOR_RGBA(0xff, 0x00, 0x00, 0xff), L"rendererSubmit");
+
+		updateResolution(_render->m_resolution);
+
+		int64_t elapsed = -bx::getHPCounter();
+		int64_t captureElapsed = 0;
+
+		if (0 < _render->m_iboffset)
+		{
+			TransientIndexBuffer* ib = _render->m_transientIb;
+			m_indexBuffers[ib->handle.idx].update(m_commandList, 0, _render->m_iboffset, ib->data);
+		}
+
+		if (0 < _render->m_vboffset)
+		{
+			TransientVertexBuffer* vb = _render->m_transientVb;
+			m_vertexBuffers[vb->handle.idx].update(m_commandList, 0, _render->m_vboffset, vb->data);
+		}
+
+		_render->sort();
+
+		RenderDraw currentState;
+		currentState.clear();
+		currentState.m_flags = BGFX_STATE_NONE;
+		currentState.m_stencil = packStencil(BGFX_STENCIL_NONE, BGFX_STENCIL_NONE);
+
+		_render->m_hmdInitialized = false;
+
+		const bool hmdEnabled = false;
+		ViewState viewState(_render, hmdEnabled);
+		viewState.reset(_render, hmdEnabled);
+
+// 		bool wireframe = !!(_render->m_debug&BGFX_DEBUG_WIREFRAME);
+// 		bool scissorEnabled = false;
+// 		setDebugWireframe(wireframe);
+
+		uint16_t programIdx = invalidHandle;
+		ID3D12PipelineState* currentPso = NULL;
+		SortKey key;
+		uint8_t view = 0xff;
+		FrameBufferHandle fbh = BGFX_INVALID_HANDLE;
+		float alphaRef = 0.0f;
+
+// 		const uint64_t pt = _render->m_debug&BGFX_DEBUG_WIREFRAME ? BGFX_STATE_PT_LINES : 0;
+// 		uint8_t primIndex = uint8_t(pt >> BGFX_STATE_PT_SHIFT);
+// 		PrimInfo prim = s_primInfo[primIndex];
+// 		deviceCtx->IASetPrimitiveTopology(prim.m_type);
+
+		bool wasCompute = false;
+		bool viewHasScissor = false;
+		Rect viewScissorRect;
+		viewScissorRect.clear();
+
+		BX_UNUSED(alphaRef);
+
+		uint32_t statsNumPrimsSubmitted[BX_COUNTOF(s_primInfo)] = {};
+		uint32_t statsNumPrimsRendered[BX_COUNTOF(s_primInfo)] = {};
+		uint32_t statsNumInstances[BX_COUNTOF(s_primInfo)] = {};
+		uint32_t statsNumIndices = 0;
+		uint32_t statsKeyType[2] = {};
+
+		m_backBufferColorIdx = m_frame % m_scd.BufferCount;
+		m_frame++;
+
+		const uint64_t f0 = BGFX_STATE_BLEND_FUNC(BGFX_STATE_BLEND_FACTOR, BGFX_STATE_BLEND_FACTOR);
+		const uint64_t f1 = BGFX_STATE_BLEND_FUNC(BGFX_STATE_BLEND_INV_FACTOR, BGFX_STATE_BLEND_INV_FACTOR);
+
+		D3D12_GPU_DESCRIPTOR_HANDLE gpuHandle;
+		m_scratchBuffer[m_backBufferColorIdx].reset(gpuHandle);
+
+		setResourceBarrier(m_commandList
+			, m_backBufferColor[m_backBufferColorIdx]
+			, D3D12_RESOURCE_STATE_PRESENT
+			, D3D12_RESOURCE_STATE_RENDER_TARGET
+			);
+
+		if (0 == (_render->m_debug&BGFX_DEBUG_IFH) )
+		{
+// 			uint8_t eye = 0;
+// 			uint8_t restartState = 0;
+			viewState.m_rect = _render->m_rect[0];
+
+			int32_t numItems = _render->m_num;
+			for (int32_t item = 0, restartItem = numItems; item < numItems || restartItem < numItems;)
+			{
+				const bool isCompute = key.decode(_render->m_sortKeys[item], _render->m_viewRemap);
+				statsKeyType[isCompute]++;
+
+				const bool viewChanged = 0
+					|| key.m_view != view
+					|| item == numItems
+					;
+
+				const RenderItem& renderItem = _render->m_renderItem[_render->m_sortValues[item] ];
+				++item;
+
+				if (viewChanged)
+				{
+					kick();
+
+					if (isCompute)
+					{
+						m_commandList->SetComputeRootSignature(m_rootSignature);
+					}
+					else
+					{
+						m_commandList->SetGraphicsRootSignature(m_rootSignature);
+					}
+
+					ID3D12DescriptorHeap* heaps[] = {
+						m_samplerAllocator.getHeap(),
+						m_scratchBuffer[m_backBufferColorIdx].getHeap(),
+					};
+					m_commandList->SetDescriptorHeaps(BX_COUNTOF(heaps), heaps);
+
+					view = key.m_view;
+					programIdx = invalidHandle;
+					currentPso = NULL;
+
+					fbh = _render->m_fb[view];
+					setFrameBuffer(fbh);
+
+					viewState.m_rect = _render->m_rect[view];
+					const Rect& rect        = _render->m_rect[view];
+					const Rect& scissorRect = _render->m_scissor[view];
+					viewHasScissor  = !scissorRect.isZero();
+					viewScissorRect = viewHasScissor ? scissorRect : rect;
+
+					D3D12_VIEWPORT vp;
+					vp.TopLeftX = rect.m_x;
+					vp.TopLeftY = rect.m_y;
+					vp.Width    = rect.m_width;
+					vp.Height   = rect.m_height;
+					vp.MinDepth = 0.0f;
+					vp.MaxDepth = 1.0f;
+					m_commandList->RSSetViewports(1, &vp);
+
+					D3D12_RECT rc;
+					rc.left   = viewScissorRect.m_x;
+					rc.top    = viewScissorRect.m_y;
+					rc.right  = viewScissorRect.m_x + viewScissorRect.m_width;
+					rc.bottom = viewScissorRect.m_y + viewScissorRect.m_height;
+					m_commandList->RSSetScissorRects(1, &rc);
+
+					Clear& clear = _render->m_clear[view];
+					if (BGFX_CLEAR_NONE != clear.m_flags)
+					{
+						Rect clearRect = rect;
+						clearRect.intersect(rect, viewScissorRect);
+						clearQuad(_clearQuad, clearRect, clear, _render->m_clearColor);
+					}
+				}
+
+				if (isCompute)
+				{
+					if (!wasCompute)
+					{
+						wasCompute = true;
+					}
+					const RenderCompute& compute = renderItem.compute;
+
+					bool programChanged = false;
+					bool constantsChanged = compute.m_constBegin < compute.m_constEnd;
+					rendererUpdateUniforms(this, _render->m_constantBuffer, compute.m_constBegin, compute.m_constEnd);
+
+					if (key.m_program != programIdx)
+					{
+						programIdx = key.m_program;
+
+						ProgramD3D12& program = m_program[key.m_program];
+						m_currentProgram = &program;
+
+						programChanged =
+							constantsChanged = true;
+					}
+
+					if (invalidHandle != programIdx)
+					{
+						ProgramD3D12& program = m_program[programIdx];
+
+						if (constantsChanged)
+						{
+							ConstantBuffer* vcb = program.m_vsh->m_constantBuffer;
+							if (NULL != vcb)
+							{
+								commit(*vcb);
+							}
+						}
+
+						viewState.setPredefined<4>(this, view, 0, program, _render, compute);
+
+						if (constantsChanged
+						||  program.m_numPredefined > 0)
+						{
+							commitShaderConstants(gpuHandle);
+						}
+					}
+
+					if (programChanged)
+					{
+						ID3D12PipelineState* pso = getPipelineState(programIdx);
+						m_commandList->SetPipelineState(pso);
+					}
+
+					D3D12_GPU_DESCRIPTOR_HANDLE srvHandle[BGFX_MAX_COMPUTE_BINDINGS] = {};
+					uint32_t samplerFlags[BGFX_MAX_COMPUTE_BINDINGS] = {};
+
+					for (uint32_t ii = 0; ii < BGFX_MAX_COMPUTE_BINDINGS; ++ii)
+					{
+						const Binding& bind = compute.m_bind[ii];
+						if (invalidHandle != bind.m_idx)
+						{
+							switch (bind.m_type)
+							{
+							case Binding::Image:
+								{
+									TextureD3D12& texture = m_textures[bind.m_idx];
+
+									if (Access::Read != bind.m_un.m_compute.m_access)
+									{
+										m_scratchBuffer[m_backBufferColorIdx].allocUav(srvHandle[ii], texture);
+									}
+									else
+									{
+										m_scratchBuffer[m_backBufferColorIdx].alloc(srvHandle[ii], texture);
+									}
+								}
+								break;
+
+							case Binding::IndexBuffer:
+							case Binding::VertexBuffer:
+								{
+									BufferD3D12& buffer = Binding::IndexBuffer == bind.m_type
+										? m_indexBuffers[bind.m_idx]
+										: m_vertexBuffers[bind.m_idx]
+										;
+
+									if (Access::Read != bind.m_un.m_compute.m_access)
+									{
+										m_scratchBuffer[m_backBufferColorIdx].allocUav(srvHandle[ii], buffer);
+									}
+									else
+									{
+										m_scratchBuffer[m_backBufferColorIdx].alloc(srvHandle[ii], buffer);
+									}
+								}
+								break;
+							}
+						}
+					}
+
+					uint16_t samplerStateIdx = getSamplerState(samplerFlags, BGFX_MAX_COMPUTE_BINDINGS);
+					m_commandList->SetComputeRootDescriptorTable(Rdt::Sampler, m_samplerAllocator.get(samplerStateIdx));
+					m_commandList->SetComputeRootDescriptorTable(Rdt::SRV, srvHandle[0]);
+					m_commandList->SetComputeRootDescriptorTable(Rdt::CBV, gpuHandle);
+					m_commandList->SetComputeRootDescriptorTable(Rdt::UAV, srvHandle[0]);
+
+					if (isValid(compute.m_indirectBuffer) )
+					{
+						const VertexBufferD3D12& vb = m_vertexBuffers[compute.m_indirectBuffer.idx];
+
+						uint32_t numDrawIndirect = UINT16_MAX == compute.m_numIndirect
+							? vb.m_size/BGFX_CONFIG_DRAW_INDIRECT_STRIDE
+							: compute.m_numIndirect
+							;
+
+						uint32_t args = compute.m_startIndirect * BGFX_CONFIG_DRAW_INDIRECT_STRIDE;
+						for (uint32_t ii = 0; ii < numDrawIndirect; ++ii)
+						{
+//							deviceCtx->DispatchIndirect(ptr, args);
+							args += BGFX_CONFIG_DRAW_INDIRECT_STRIDE;
+						}
+					}
+					else
+					{
+						m_commandList->Dispatch(compute.m_numX, compute.m_numY, compute.m_numZ);
+					}
+
+					continue;
+				}
+
+//				bool resetState = viewChanged || wasCompute;
+
+				if (wasCompute)
+				{
+					if (BX_ENABLED(BGFX_CONFIG_DEBUG_PIX) )
+					{
+// 						wchar_t* viewNameW = s_viewNameW[view];
+// 						viewNameW[3] = L' ';
+// 						PIX_ENDEVENT();
+// 						PIX_BEGINEVENT(D3DCOLOR_RGBA(0xff, 0x00, 0x00, 0xff), viewNameW);
+					}
+
+					wasCompute = false;
+
+					programIdx = invalidHandle;
+					m_currentProgram = NULL;
+
+					m_commandList->SetGraphicsRootSignature(m_rootSignature);
+					ID3D12DescriptorHeap* heaps[] = {
+						m_samplerAllocator.getHeap(),
+						m_scratchBuffer[m_backBufferColorIdx].getHeap(),
+					};
+					m_commandList->SetDescriptorHeaps(BX_COUNTOF(heaps), heaps);
+
+//					invalidateCompute();
+				}
+
+				const RenderDraw& draw = renderItem.draw;
+
+				bool constantsChanged = draw.m_constBegin < draw.m_constEnd;
+				rendererUpdateUniforms(this, _render->m_constantBuffer, draw.m_constBegin, draw.m_constEnd);
+
+				if (isValid(draw.m_vertexBuffer) )
+				{
+					bool programChanged = false;
+
+					if (key.m_program != programIdx)
+					{
+						programIdx = key.m_program;
+
+						if (invalidHandle == programIdx)
+						{
+							m_currentProgram = NULL;
+						}
+						else
+						{
+							ProgramD3D12& program = m_program[programIdx];
+							m_currentProgram = &program;
+						}
+
+						programChanged =
+							constantsChanged = true;
+					}
+
+					if (invalidHandle != programIdx)
+					{
+						ProgramD3D12& program = m_program[programIdx];
+
+						if (constantsChanged)
+						{
+							ConstantBuffer* vcb = program.m_vsh->m_constantBuffer;
+							if (NULL != vcb)
+							{
+								commit(*vcb);
+							}
+
+							ConstantBuffer* fcb = program.m_fsh->m_constantBuffer;
+							if (NULL != fcb)
+							{
+								commit(*fcb);
+							}
+						}
+
+						viewState.setPredefined<4>(this, view, 0, program, _render, draw);
+
+						if (constantsChanged
+						||  program.m_numPredefined > 0)
+						{
+							commitShaderConstants(gpuHandle);
+						}
+					}
+
+					const VertexBufferD3D12& vb = m_vertexBuffers[draw.m_vertexBuffer.idx];
+					uint16_t declIdx = !isValid(vb.m_decl) ? draw.m_vertexDecl.idx : vb.m_decl.idx;
+					const VertexDecl& vertexDecl = m_vertexDecls[declIdx];
+
+					const uint64_t state = draw.m_flags;
+					ID3D12PipelineState* pso = 
+						getPipelineState(state
+							, draw.m_stencil
+							, declIdx
+							, programIdx
+							, draw.m_instanceDataStride/16
+							);
+					if (pso != currentPso)
+					{
+						currentPso = pso;
+						m_commandList->SetPipelineState(pso);
+					}
+
+					const uint32_t fstencil = unpackStencil(0, draw.m_stencil);
+					const uint32_t ref = (fstencil&BGFX_STENCIL_FUNC_REF_MASK)>>BGFX_STENCIL_FUNC_REF_SHIFT;
+					m_commandList->OMSetStencilRef(ref);
+
+					bool hasFactor = 0
+						|| f0 == (state & f0)
+						|| f1 == (state & f1)
+						;
+
+					if (hasFactor)
+					{
+						float blendFactor[4] = { 1.0f, 1.0f, 1.0f, 1.0f };
+						blendFactor[0] = ( (draw.m_rgba>>24)     )/255.0f;
+						blendFactor[1] = ( (draw.m_rgba>>16)&0xff)/255.0f;
+						blendFactor[2] = ( (draw.m_rgba>> 8)&0xff)/255.0f;
+						blendFactor[3] = ( (draw.m_rgba    )&0xff)/255.0f;
+						m_commandList->OMSetBlendFactor(blendFactor);
+					}
+
+					D3D12_GPU_DESCRIPTOR_HANDLE srvHandle[BGFX_CONFIG_MAX_TEXTURE_SAMPLERS];
+					uint32_t samplerFlags[BGFX_CONFIG_MAX_TEXTURE_SAMPLERS];
+					{
+						srvHandle[0].ptr = 0;
+						for (uint32_t stage = 0; stage < BGFX_CONFIG_MAX_TEXTURE_SAMPLERS; ++stage)
+						{
+							const Binding& sampler = draw.m_bind[stage];
+							if (invalidHandle != sampler.m_idx)
+							{
+								TextureD3D12& texture = m_textures[sampler.m_idx];
+								m_scratchBuffer[m_backBufferColorIdx].alloc(srvHandle[stage], texture);
+								samplerFlags[stage] = (0 == (BGFX_SAMPLER_DEFAULT_FLAGS & sampler.m_un.m_draw.m_flags)
+									? sampler.m_un.m_draw.m_flags
+									: texture.m_flags
+									) & BGFX_TEXTURE_SAMPLER_BITS_MASK
+									;
+							}
+							else
+							{
+								memcpy(&srvHandle[stage], &srvHandle[0], sizeof(D3D12_GPU_DESCRIPTOR_HANDLE) );
+								samplerFlags[stage] = 0;
+							}
+						}
+					}
+
+					uint16_t samplerStateIdx = getSamplerState(samplerFlags);
+					m_commandList->SetGraphicsRootDescriptorTable(Rdt::Sampler, m_samplerAllocator.get(samplerStateIdx) );
+					if (srvHandle[0].ptr != 0)
+					{
+						m_commandList->SetGraphicsRootDescriptorTable(Rdt::SRV, srvHandle[0]);
+					}
+					m_commandList->SetGraphicsRootDescriptorTable(Rdt::CBV, gpuHandle);
+
+					uint32_t numVertices = draw.m_numVertices;
+					if (UINT32_MAX == numVertices)
+					{
+						numVertices = vb.m_size / vertexDecl.m_stride;
+					}
+
+					D3D12_VERTEX_BUFFER_VIEW vbView[2];
+					uint32_t numVertexBuffers = 1;
+					vbView[0].BufferLocation = vb.m_ptr->GetGPUVirtualAddress();
+					vbView[0].StrideInBytes  = vertexDecl.m_stride;
+					vbView[0].SizeInBytes    = vb.m_size;
+
+					if (isValid(draw.m_instanceDataBuffer) )
+					{
+						const VertexBufferD3D12& inst = m_vertexBuffers[draw.m_instanceDataBuffer.idx];
+						vbView[1].BufferLocation = inst.m_ptr->GetGPUVirtualAddress() + draw.m_instanceDataOffset;
+						vbView[1].StrideInBytes  = draw.m_instanceDataStride;
+						vbView[1].SizeInBytes    = draw.m_numInstances * draw.m_instanceDataStride;
+						++numVertexBuffers;
+					}
+
+					m_commandList->IASetVertexBuffers(0, numVertexBuffers, vbView);
+
+					uint32_t numIndices        = 0;
+					uint32_t numPrimsSubmitted = 0;
+					uint32_t numInstances      = 0;
+					uint32_t numPrimsRendered  = 0;
+
+					const uint64_t pt = draw.m_flags&BGFX_STATE_PT_MASK;
+					uint8_t primIdx = uint8_t(pt >> BGFX_STATE_PT_SHIFT);
+					PrimInfo prim = s_primInfo[primIdx];
+					m_commandList->IASetPrimitiveTopology(prim.m_toplogy);
+
+					if (isValid(draw.m_indexBuffer) )
+					{
+						const BufferD3D12& ib = m_indexBuffers[draw.m_indexBuffer.idx];
+						const bool hasIndex16 = 0 == (ib.m_flags & BGFX_BUFFER_INDEX32);
+
+						D3D12_INDEX_BUFFER_VIEW ibv;
+						ibv.Format = hasIndex16
+							? DXGI_FORMAT_R16_UINT
+							: DXGI_FORMAT_R32_UINT
+							;
+						ibv.BufferLocation = ib.m_ptr->GetGPUVirtualAddress();
+						ibv.SizeInBytes    = ib.m_size;
+						m_commandList->IASetIndexBuffer(&ibv);
+
+						if (UINT32_MAX == draw.m_numIndices)
+						{
+							const uint32_t indexSize = hasIndex16 ? 2 : 4;
+							numIndices        = ib.m_size / indexSize;
+							numPrimsSubmitted = numIndices / prim.m_div - prim.m_sub;
+							numInstances      = draw.m_numInstances;
+							numPrimsRendered  = numPrimsSubmitted*draw.m_numInstances;
+
+							m_commandList->DrawIndexedInstanced(numIndices
+								, draw.m_numInstances
+								, draw.m_startIndex
+								, draw.m_startVertex
+								, 0
+								);
+						}
+						else if (prim.m_min <= draw.m_numIndices)
+						{
+							numIndices        = draw.m_numIndices;
+							numPrimsSubmitted = numIndices / prim.m_div - prim.m_sub;
+							numInstances      = draw.m_numInstances;
+							numPrimsRendered  = numPrimsSubmitted*draw.m_numInstances;
+
+							m_commandList->DrawIndexedInstanced(numIndices
+								, draw.m_numInstances
+								, draw.m_startIndex
+								, draw.m_startVertex
+								, 0
+								);
+						}
+					}
+					else
+					{
+						numPrimsSubmitted = numVertices / prim.m_div - prim.m_sub;
+						numInstances      = draw.m_numInstances;
+						numPrimsRendered  = numPrimsSubmitted*draw.m_numInstances;
+
+						m_commandList->DrawInstanced(numVertices
+							, draw.m_numInstances
+							, draw.m_startVertex
+							, 0
+							);
+					}
+
+					statsNumPrimsSubmitted[primIdx] += numPrimsSubmitted;
+					statsNumPrimsRendered[primIdx]  += numPrimsRendered;
+					statsNumInstances[primIdx]      += numInstances;
+					statsNumIndices                 += numIndices;
+				}
+			}
+		}
+
+		int64_t now = bx::getHPCounter();
+		elapsed += now;
+
+		static int64_t last = now;
+		int64_t frameTime = now - last;
+		last = now;
+
+		static int64_t min = frameTime;
+		static int64_t max = frameTime;
+		min = min > frameTime ? frameTime : min;
+		max = max < frameTime ? frameTime : max;
+
+		if (_render->m_debug & (BGFX_DEBUG_IFH | BGFX_DEBUG_STATS) )
+		{
+//			PIX_BEGINEVENT(D3DCOLOR_RGBA(0x40, 0x40, 0x40, 0xff), L"debugstats");
+
+			TextVideoMem& tvm = m_textVideoMem;
+
+			static int64_t next = now;
+
+			if (now >= next)
+			{
+				next = now + bx::getHPFrequency();
+				double freq = double(bx::getHPFrequency() );
+				double toMs = 1000.0 / freq;
+
+				tvm.clear();
+				uint16_t pos = 0;
+				tvm.printf(0, pos++, BGFX_CONFIG_DEBUG ? 0x89 : 0x8f
+					, " %s / " BX_COMPILER_NAME " / " BX_CPU_NAME " / " BX_ARCH_NAME " / " BX_PLATFORM_NAME " "
+					, getRendererName()
+					);
+
+				const DXGI_ADAPTER_DESC& desc = m_adapterDesc;
+				char description[BX_COUNTOF(desc.Description)];
+				wcstombs(description, desc.Description, BX_COUNTOF(desc.Description) );
+				tvm.printf(0, pos++, 0x0f, " Device: %s", description);
+
+				char dedicatedVideo[16];
+				bx::prettify(dedicatedVideo, BX_COUNTOF(dedicatedVideo), desc.DedicatedVideoMemory);
+
+				char dedicatedSystem[16];
+				bx::prettify(dedicatedSystem, BX_COUNTOF(dedicatedSystem), desc.DedicatedSystemMemory);
+
+				char sharedSystem[16];
+				bx::prettify(sharedSystem, BX_COUNTOF(sharedSystem), desc.SharedSystemMemory);
+
+				tvm.printf(0, pos++, 0x0f, " Memory: %s (video), %s (system), %s (shared)"
+					, dedicatedVideo
+					, dedicatedSystem
+					, sharedSystem
+					);
+
+				pos = 10;
+				tvm.printf(10, pos++, 0x8e, "       Frame: %7.3f, % 7.3f \x1f, % 7.3f \x1e [ms] / % 6.2f FPS "
+					, double(frameTime)*toMs
+					, double(min)*toMs
+					, double(max)*toMs
+					, freq/frameTime
+					);
+
+				char hmd[16];
+				bx::snprintf(hmd, BX_COUNTOF(hmd), ", [%c] HMD ", hmdEnabled ? '\xfe' : ' ');
+
+				const uint32_t msaa = (m_resolution.m_flags&BGFX_RESET_MSAA_MASK)>>BGFX_RESET_MSAA_SHIFT;
+				tvm.printf(10, pos++, 0x8e, " Reset flags: [%c] vsync, [%c] MSAAx%d%s, [%c] MaxAnisotropy "
+					, !!(m_resolution.m_flags&BGFX_RESET_VSYNC) ? '\xfe' : ' '
+					, 0 != msaa ? '\xfe' : ' '
+					, 1<<msaa
+					, ", no-HMD "
+					, !!(m_resolution.m_flags&BGFX_RESET_MAXANISOTROPY) ? '\xfe' : ' '
+					);
+
+				double elapsedCpuMs = double(elapsed)*toMs;
+				tvm.printf(10, pos++, 0x8e, "   Submitted: %4d (draw %4d, compute %4d) / CPU %3.4f [ms]"
+					, _render->m_num
+					, statsKeyType[0]
+					, statsKeyType[1]
+					, elapsedCpuMs
+					);
+
+				for (uint32_t ii = 0; ii < BX_COUNTOF(s_primName); ++ii)
+				{
+					tvm.printf(10, pos++, 0x8e, "   %9s: %7d (#inst: %5d), submitted: %7d"
+						, s_primName[ii]
+						, statsNumPrimsRendered[ii]
+						, statsNumInstances[ii]
+						, statsNumPrimsSubmitted[ii]
+						);
+				}
+
+// 				if (NULL != m_renderdocdll)
+// 				{
+// 					tvm.printf(tvm.m_width-27, 0, 0x1f, " [F11 - RenderDoc capture] ");
+// 				}
+
+				tvm.printf(10, pos++, 0x8e, "     Indices: %7d", statsNumIndices);
+				tvm.printf(10, pos++, 0x8e, "    DVB size: %7d", _render->m_vboffset);
+				tvm.printf(10, pos++, 0x8e, "    DIB size: %7d", _render->m_iboffset);
+
+				pos++;
+				tvm.printf(10, pos++, 0x8e, " State cache:                                ");
+				tvm.printf(10, pos++, 0x8e, " PSO    | Sampler | Queued                   ");
+				tvm.printf(10, pos++, 0x8e, " %6d |  %6d | %6d"
+					, m_pipelineStateCache.getCount()
+					, m_samplerStateCache.getCount()
+					, m_cmd.m_control.available()
+					);
+				pos++;
+
+				double captureMs = double(captureElapsed)*toMs;
+				tvm.printf(10, pos++, 0x8e, "     Capture: %3.4f [ms]", captureMs);
+
+				uint8_t attr[2] = { 0x89, 0x8a };
+				uint8_t attrIndex = _render->m_waitSubmit < _render->m_waitRender;
+
+				tvm.printf(10, pos++, attr[attrIndex&1], " Submit wait: %3.4f [ms]", _render->m_waitSubmit*toMs);
+				tvm.printf(10, pos++, attr[(attrIndex+1)&1], " Render wait: %3.4f [ms]", _render->m_waitRender*toMs);
+
+				min = frameTime;
+				max = frameTime;
+			}
+
+			blit(this, _textVideoMemBlitter, tvm);
+
+//			PIX_ENDEVENT();
+		}
+		else if (_render->m_debug & BGFX_DEBUG_TEXT)
+		{
+//			PIX_BEGINEVENT(D3DCOLOR_RGBA(0x40, 0x40, 0x40, 0xff), L"debugtext");
+
+			blit(this, _textVideoMemBlitter, _render->m_textVideoMem);
+
+//			PIX_ENDEVENT();
+		}
+
+		setResourceBarrier(m_commandList
+			, m_backBufferColor[m_backBufferColorIdx]
+			, D3D12_RESOURCE_STATE_RENDER_TARGET
+			, D3D12_RESOURCE_STATE_PRESENT
+			);
+		m_backBufferColorFence[m_backBufferColorIdx] = kick();
+	}
+} /* namespace d3d12 */ } // namespace bgfx
+
 #else
 
-namespace bgfx { namespace d3d12
+namespace bgfx
 {
 	RendererContextI* rendererCreate()
 	{

+ 430 - 0
src/renderer_d3d12.h

@@ -0,0 +1,430 @@
+/*
+ * Copyright 2011-2015 Branimir Karadzic. All rights reserved.
+ * License: http://www.opensource.org/licenses/BSD-2-Clause
+ */
+
+#ifndef BGFX_RENDERER_D3D12_H_HEADER_GUARD
+#define BGFX_RENDERER_D3D12_H_HEADER_GUARD
+
+#define USE_D3D12_DYNAMIC_LIB 1
+
+#include <d3d12.h>
+#include <d3dx12.h>
+#include <dxgidebug.h>
+
+#include "renderer.h"
+#include "renderer_d3d.h"
+#include "shader_dxbc.h"
+
+namespace bgfx { namespace d3d12
+{
+	struct Rdt
+	{
+		enum Enum
+		{
+			Sampler,
+			SRV,
+			CBV,
+			UAV,
+
+			Count
+		};
+	};
+
+	class ScratchBufferD3D12
+	{
+	public:
+		ScratchBufferD3D12()
+		{
+		}
+
+		~ScratchBufferD3D12()
+		{
+		}
+
+		void create(uint32_t _size, uint32_t _maxDescriptors);
+		void destroy();
+		void reset(D3D12_GPU_DESCRIPTOR_HANDLE& gpuHandle);
+		void* alloc(D3D12_GPU_DESCRIPTOR_HANDLE& gpuHandle, uint32_t _size);
+		void  alloc(D3D12_GPU_DESCRIPTOR_HANDLE& gpuHandle, struct TextureD3D12& _texture);
+		void  allocUav(D3D12_GPU_DESCRIPTOR_HANDLE& gpuHandle, struct TextureD3D12& _texture);
+
+		void  alloc(D3D12_GPU_DESCRIPTOR_HANDLE& gpuHandle, struct BufferD3D12& _buffer);
+		void  allocUav(D3D12_GPU_DESCRIPTOR_HANDLE& gpuHandle, struct BufferD3D12& _buffer);
+
+		ID3D12DescriptorHeap* getHeap()
+		{
+			return m_heap;
+		}
+
+	private:
+		ID3D12DescriptorHeap* m_heap;
+		ID3D12Resource* m_upload;
+		D3D12_CPU_DESCRIPTOR_HANDLE m_cpuHandle;
+		D3D12_GPU_DESCRIPTOR_HANDLE m_gpuHandle;
+		uint32_t m_incrementSize;
+		uint8_t* m_data;
+		uint32_t m_size;
+		uint32_t m_pos;
+	};
+
+	class DescriptorAllocator
+	{
+	public:
+		DescriptorAllocator()
+			: m_numDescriptorsPerBlock(1)
+		{
+		}
+
+		~DescriptorAllocator()
+		{
+		}
+
+		void create(D3D12_DESCRIPTOR_HEAP_TYPE _type, uint32_t _maxDescriptors, uint16_t _numDescriptorsPerBlock = 1);
+		void destroy();
+
+		uint16_t alloc(ID3D12Resource* _ptr, const D3D12_SHADER_RESOURCE_VIEW_DESC* _desc);
+		uint16_t alloc(const uint32_t* _flags, uint32_t _num = BGFX_CONFIG_MAX_TEXTURE_SAMPLERS);
+		void free(uint16_t _handle);
+
+		D3D12_GPU_DESCRIPTOR_HANDLE get(uint16_t _handle);
+
+		ID3D12DescriptorHeap* getHeap()
+		{
+			return m_heap;
+		}
+
+	private:
+		ID3D12DescriptorHeap* m_heap;
+		bx::HandleAlloc* m_handleAlloc;
+		D3D12_CPU_DESCRIPTOR_HANDLE m_cpuHandle;
+		D3D12_GPU_DESCRIPTOR_HANDLE m_gpuHandle;
+		uint32_t m_incrementSize;
+		uint16_t m_numDescriptorsPerBlock;
+	};
+
+	struct BufferD3D12
+	{
+		BufferD3D12()
+			: m_ptr(NULL)
+			, m_staging(NULL)
+			, m_state(D3D12_RESOURCE_STATE_COMMON)
+			, m_size(0)
+			, m_flags(BGFX_BUFFER_NONE)
+			, m_dynamic(false)
+		{
+		}
+
+		void create(uint32_t _size, void* _data, uint16_t _flags, bool _vertex);
+		void update(ID3D12GraphicsCommandList* _commandList, uint32_t _offset, uint32_t _size, void* _data, bool _discard = false);
+
+		void destroy()
+		{
+			if (NULL != m_ptr)
+			{
+				DX_RELEASE(m_ptr, 0);
+				DX_RELEASE(m_staging, 0);
+				m_dynamic = false;
+			}
+		}
+
+		void setState(ID3D12GraphicsCommandList* _commandList, D3D12_RESOURCE_STATES _state);
+
+		D3D12_SHADER_RESOURCE_VIEW_DESC  m_srvd;
+		D3D12_UNORDERED_ACCESS_VIEW_DESC m_uavd;
+		ID3D12Resource* m_ptr;
+		ID3D12Resource* m_staging;
+		D3D12_RESOURCE_STATES m_state;
+		uint32_t m_size;
+		uint16_t m_flags;
+		bool m_dynamic;
+	};
+
+	struct VertexBufferD3D12 : public BufferD3D12
+	{
+		void create(uint32_t _size, void* _data, VertexDeclHandle _declHandle, uint16_t _flags);
+
+		VertexDeclHandle m_decl;
+	};
+
+	struct ShaderD3D12
+	{
+		ShaderD3D12()
+			: m_code(NULL)
+			, m_constantBuffer(NULL)
+			, m_hash(0)
+			, m_numUniforms(0)
+			, m_numPredefined(0)
+		{
+		}
+
+		void create(const Memory* _mem);
+		DWORD* getShaderCode(uint8_t _fragmentBit, const Memory* _mem);
+
+		void destroy()
+		{
+			if (NULL != m_constantBuffer)
+			{
+				ConstantBuffer::destroy(m_constantBuffer);
+				m_constantBuffer = NULL;
+			}
+
+			m_numPredefined = 0;
+
+			if (NULL != m_code)
+			{
+				release(m_code);
+				m_code = NULL;
+				m_hash = 0;
+			}
+		}
+
+		const Memory* m_code;
+		ConstantBuffer* m_constantBuffer;
+
+		PredefinedUniform m_predefined[PredefinedUniform::Count];
+		uint8_t m_attrMask[Attrib::Count];
+
+		uint32_t m_hash;
+		uint16_t m_numUniforms;
+		uint16_t m_size;
+		uint8_t m_numPredefined;
+	};
+
+	struct ProgramD3D12
+	{
+		ProgramD3D12()
+			: m_vsh(NULL)
+			, m_fsh(NULL)
+		{
+		}
+
+		void create(const ShaderD3D12* _vsh, const ShaderD3D12* _fsh)
+		{
+			BX_CHECK(NULL != _vsh->m_code, "Vertex shader doesn't exist.");
+			m_vsh = _vsh;
+			memcpy(&m_predefined[0], _vsh->m_predefined, _vsh->m_numPredefined*sizeof(PredefinedUniform));
+			m_numPredefined = _vsh->m_numPredefined;
+
+			if (NULL != _fsh)
+			{
+				BX_CHECK(NULL != _fsh->m_code, "Fragment shader doesn't exist.");
+				m_fsh = _fsh;
+				memcpy(&m_predefined[m_numPredefined], _fsh->m_predefined, _fsh->m_numPredefined*sizeof(PredefinedUniform));
+				m_numPredefined += _fsh->m_numPredefined;
+			}
+		}
+
+		void destroy()
+		{
+			m_numPredefined = 0;
+			m_vsh = NULL;
+			m_fsh = NULL;
+		}
+
+		const ShaderD3D12* m_vsh;
+		const ShaderD3D12* m_fsh;
+
+		PredefinedUniform m_predefined[PredefinedUniform::Count * 2];
+		uint8_t m_numPredefined;
+	};
+
+	struct TextureD3D12
+	{
+		enum Enum
+		{
+			Texture2D,
+			Texture3D,
+			TextureCube,
+		};
+
+		TextureD3D12()
+			: m_ptr(NULL)
+			, m_staging(NULL)
+			, m_state(D3D12_RESOURCE_STATE_COMMON)
+			, m_numMips(0)
+		{
+		}
+
+		void create(const Memory* _mem, uint32_t _flags, uint8_t _skip);
+		void destroy();
+		void update(ID3D12GraphicsCommandList* _commandList, uint8_t _side, uint8_t _mip, const Rect& _rect, uint16_t _z, uint16_t _depth, uint16_t _pitch, const Memory* _mem);
+		void commit(uint8_t _stage, uint32_t _flags = BGFX_SAMPLER_DEFAULT_FLAGS);
+		void resolve();
+		void setState(ID3D12GraphicsCommandList* _commandList, D3D12_RESOURCE_STATES _state);
+
+		D3D12_SHADER_RESOURCE_VIEW_DESC  m_srvd;
+		D3D12_UNORDERED_ACCESS_VIEW_DESC m_uavd;
+		ID3D12Resource* m_ptr;
+		ID3D12Resource* m_staging;
+		D3D12_RESOURCE_STATES m_state;
+		uint32_t m_flags;
+		uint16_t m_samplerIdx;
+		uint8_t m_type;
+		uint8_t m_requestedFormat;
+		uint8_t m_textureFormat;
+		uint8_t m_numMips;
+	};
+
+	struct FrameBufferD3D12
+	{
+		FrameBufferD3D12()
+			: m_swapChain(NULL)
+			, m_denseIdx(UINT16_MAX)
+			, m_num(0)
+			, m_numTh(0)
+		{
+			m_depth.idx = bgfx::invalidHandle;
+		}
+
+		void create(uint8_t _num, const TextureHandle* _handles);
+		void create(uint16_t _denseIdx, void* _nwh, uint32_t _width, uint32_t _height, TextureFormat::Enum _depthFormat);
+		uint16_t destroy();
+		void preReset();
+		void postReset();
+		void resolve();
+		void clear(ID3D12GraphicsCommandList* _commandList, const Clear& _clear, const float _palette[][4], const D3D12_RECT* _rect = NULL, uint32_t _num = 0);
+
+		TextureHandle m_texture[BGFX_CONFIG_MAX_FRAME_BUFFER_ATTACHMENTS];
+		TextureHandle m_depth;
+		IDXGISwapChain* m_swapChain;
+		uint16_t m_denseIdx;
+		uint8_t m_num;
+		uint8_t m_numTh;
+		TextureHandle m_th[BGFX_CONFIG_MAX_FRAME_BUFFER_ATTACHMENTS];
+	};
+
+	struct CommandQueue
+	{
+		CommandQueue()
+			: m_control(BX_COUNTOF(m_commandList) )
+		{
+		}
+
+		void init(ID3D12Device* _device)
+		{
+			D3D12_COMMAND_QUEUE_DESC queueDesc;
+			queueDesc.Type     = D3D12_COMMAND_LIST_TYPE_DIRECT;
+			queueDesc.Priority = 0;
+			queueDesc.Flags    = D3D12_COMMAND_QUEUE_FLAG_NONE;
+			queueDesc.NodeMask = 1;
+			DX_CHECK(_device->CreateCommandQueue(&queueDesc
+					, __uuidof(ID3D12CommandQueue)
+					, (void**)&m_commandQueue
+					) );
+
+			m_currentFence = 0;
+			DX_CHECK(_device->CreateFence(0
+					, D3D12_FENCE_FLAG_NONE
+					, __uuidof(ID3D12Fence)
+					, (void**)&m_fence
+					) );
+
+			for (uint32_t ii = 0; ii < BX_COUNTOF(m_commandList); ++ii)
+			{
+				DX_CHECK(_device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT
+						, __uuidof(ID3D12CommandAllocator)
+						, (void**)&m_commandList[ii].m_commandAllocator
+						) );
+
+				DX_CHECK(_device->CreateCommandList(0
+						, D3D12_COMMAND_LIST_TYPE_DIRECT
+						, m_commandList[ii].m_commandAllocator
+						, NULL
+						, __uuidof(ID3D12GraphicsCommandList)
+						, (void**)&m_commandList[ii].m_commandList
+						) );
+
+				DX_CHECK(m_commandList[ii].m_commandList->Close() );
+			}
+		}
+
+		void shutdown()
+		{
+			finish();
+
+			DX_RELEASE(m_fence, 0);
+
+			for (uint32_t ii = 0; ii < BX_COUNTOF(m_commandList); ++ii)
+			{
+				DX_RELEASE(m_commandList[ii].m_commandAllocator, 0);
+				DX_RELEASE(m_commandList[ii].m_commandList, 0);
+			}
+
+			DX_RELEASE(m_commandQueue, 0);
+		}
+
+		ID3D12GraphicsCommandList* alloc()
+		{
+			while (0 == m_control.reserve(1) )
+			{
+				CommandList& commandList = m_commandList[m_control.m_read];
+				WaitForSingleObject(commandList.m_event, INFINITE);
+				CloseHandle(commandList.m_event);
+
+				m_control.consume(1);
+			}
+
+			CommandList& commandList = m_commandList[m_control.m_current];
+			DX_CHECK(commandList.m_commandList->Reset(commandList.m_commandAllocator, NULL) );
+			return commandList.m_commandList;
+		}
+
+		uint64_t kick()
+		{
+			CommandList& commandList = m_commandList[m_control.m_current];
+			DX_CHECK(commandList.m_commandList->Close() );
+
+			ID3D12CommandList* commandLists[] = { commandList.m_commandList };
+			m_commandQueue->ExecuteCommandLists(BX_COUNTOF(commandLists), commandLists);
+
+			commandList.m_event = CreateEventExA(NULL, NULL, 0, EVENT_ALL_ACCESS);
+			const uint64_t fence = m_currentFence++;
+			m_commandQueue->Signal(m_fence, fence);
+			m_fence->SetEventOnCompletion(fence, commandList.m_event);
+
+			m_control.commit(1);
+
+			return fence;
+		}
+
+		void finish(uint64_t _waitFence = UINT64_MAX)
+		{
+			while (0 < m_control.available() )
+			{
+				CommandList& commandList = m_commandList[m_control.m_read];
+				WaitForSingleObject(commandList.m_event, INFINITE);
+				CloseHandle(commandList.m_event);
+				commandList.m_event = NULL;
+				m_completedFence = m_fence->GetCompletedValue();
+				m_commandQueue->Wait(m_fence, m_completedFence);
+
+				m_control.consume(1);
+
+				if (_waitFence <= m_completedFence)
+				{
+					return;
+				}
+			}
+
+			BX_CHECK(0 == m_control.available(), "");
+		}
+
+		struct CommandList
+		{
+			ID3D12GraphicsCommandList* m_commandList;
+			ID3D12CommandAllocator* m_commandAllocator;
+			HANDLE m_event;
+		};
+
+		ID3D12CommandQueue* m_commandQueue;
+		uint64_t m_currentFence;
+		uint64_t m_completedFence;
+		ID3D12Fence* m_fence;
+		CommandList m_commandList[4];
+		bx::RingBufferControl m_control;
+	};
+
+} /* namespace d3d12 */ } // namespace bgfx
+
+#endif // BGFX_RENDERER_D3D12_H_HEADER_GUARD

+ 748 - 0
src/shader_dx9bc.cpp

@@ -0,0 +1,748 @@
+/*
+ * Copyright 2011-2015 Branimir Karadzic. All rights reserved.
+ * License: http://www.opensource.org/licenses/BSD-2-Clause
+ */
+
+#include "bgfx_p.h"
+#include "shader_dx9bc.h"
+
+BX_PRAGMA_DIAGNOSTIC_IGNORED_CLANG_GCC("-Wunused-parameter");
+BX_PRAGMA_DIAGNOSTIC_IGNORED_CLANG("-Wunneeded-internal-declaration");
+
+namespace bgfx
+{
+	struct Dx9bcOpcodeInfo
+	{
+		uint8_t numOperands;
+		uint8_t numValues;
+	};
+
+	static const Dx9bcOpcodeInfo s_dx9bcOpcodeInfo[] =
+	{
+		{ 0, 0 }, // NOP
+		{ 2, 0 }, // MOV
+		{ 3, 0 }, // ADD
+		{ 1, 0 }, // SUB
+		{ 4, 0 }, // MAD
+		{ 3, 0 }, // MUL
+		{ 2, 0 }, // RCP
+		{ 2, 0 }, // RSQ
+		{ 3, 0 }, // DP3
+		{ 3, 0 }, // DP4
+		{ 3, 0 }, // MIN
+		{ 3, 0 }, // MAX
+		{ 3, 0 }, // SLT
+		{ 3, 0 }, // SGE
+		{ 2, 0 }, // EXP
+		{ 2, 0 }, // LOG
+		{ 1, 0 }, // LIT
+		{ 1, 0 }, // DST
+		{ 4, 0 }, // LRP
+		{ 2, 0 }, // FRC
+		{ 1, 0 }, // M4X4
+		{ 1, 0 }, // M4X3
+		{ 1, 0 }, // M3X4
+		{ 1, 0 }, // M3X3
+		{ 1, 0 }, // M3X2
+		{ 0, 0 }, // CALL
+		{ 0, 0 }, // CALLNZ
+		{ 0, 0 }, // LOOP
+		{ 0, 0 }, // RET
+		{ 0, 0 }, // ENDLOOP
+		{ 0, 0 }, // LABEL
+		{ 1, 1 }, // DCL
+		{ 3, 0 }, // POW
+		{ 1, 0 }, // CRS
+		{ 1, 0 }, // SGN
+		{ 1, 0 }, // ABS
+		{ 2, 0 }, // NRM
+		{ 4, 0 }, // SINCOS
+		{ 1, 0 }, // REP
+		{ 0, 0 }, // ENDREP
+		{ 1, 0 }, // IF
+		{ 2, 0 }, // IFC
+		{ 0, 0 }, // ELSE
+		{ 0, 0 }, // ENDIF
+		{ 0, 0 }, // BREAK
+		{ 2, 0 }, // BREAKC
+		{ 2, 0 }, // MOVA
+		{ 1, 4 }, // DEFB
+		{ 1, 4 }, // DEFI
+		{ 0, 0 }, // 0
+		{ 0, 0 }, // 1
+		{ 0, 0 }, // 2
+		{ 0, 0 }, // 3
+		{ 0, 0 }, // 4
+		{ 0, 0 }, // 5
+		{ 0, 0 }, // 6
+		{ 0, 0 }, // 7
+		{ 0, 0 }, // 8
+		{ 0, 0 }, // 9
+		{ 0, 0 }, // 10
+		{ 0, 0 }, // 11
+		{ 0, 0 }, // 12
+		{ 0, 0 }, // 13
+		{ 0, 0 }, // 14
+		{ 1, 0 }, // TEXCOORD
+		{ 1, 0 }, // TEXKILL
+		{ 3, 0 }, // TEX
+		{ 1, 0 }, // TEXBEM
+		{ 1, 0 }, // TEXBEM1
+		{ 1, 0 }, // TEXREG2AR
+		{ 1, 0 }, // TEXREG2GB
+		{ 1, 0 }, // TEXM3X2PAD
+		{ 1, 0 }, // TEXM3X2TEX
+		{ 1, 0 }, // TEXM3X3PAD
+		{ 1, 0 }, // TEXM3X3TEX
+		{ 1, 0 }, // TEXM3X3DIFF
+		{ 1, 0 }, // TEXM3X3SPEC
+		{ 1, 0 }, // TEXM3X3VSPEC
+		{ 2, 0 }, // EXPP
+		{ 2, 0 }, // LOGP
+		{ 4, 0 }, // CND
+		{ 1, 4 }, // DEF
+		{ 1, 0 }, // TEXREG2RGB
+		{ 1, 0 }, // TEXDP3TEX
+		{ 1, 0 }, // TEXM3X2DEPTH
+		{ 1, 0 }, // TEXDP3
+		{ 1, 0 }, // TEXM3X3
+		{ 1, 0 }, // TEXDEPTH
+		{ 4, 0 }, // CMP
+		{ 1, 0 }, // BEM
+		{ 4, 0 }, // DP2ADD
+		{ 2, 0 }, // DSX
+		{ 2, 0 }, // DSY
+		{ 5, 0 }, // TEXLDD
+		{ 1, 0 }, // SETP
+		{ 3, 0 }, // TEXLDL
+		{ 0, 0 }, // BREAKP
+	};
+	BX_STATIC_ASSERT(BX_COUNTOF(s_dx9bcOpcodeInfo) == Dx9bcOpcode::Count);
+
+	static const char* s_dx9bcOpcode[] =
+	{
+		"nop",
+		"mov",
+		"add",
+		"sub",
+		"mad",
+		"mul",
+		"rcp",
+		"rsq",
+		"dp3",
+		"dp4",
+		"min",
+		"max",
+		"slt",
+		"sge",
+		"exp",
+		"log",
+		"lit",
+		"dst",
+		"lrp",
+		"frc",
+		"m4x4",
+		"m4x3",
+		"m3x4",
+		"m3x3",
+		"m3x2",
+		"call",
+		"callnz",
+		"loop",
+		"ret",
+		"endloop",
+		"label",
+		"dcl",
+		"pow",
+		"crs",
+		"sgn",
+		"abs",
+		"nrm",
+		"sincos",
+		"rep",
+		"endrep",
+		"if",
+		"ifc",
+		"else",
+		"endif",
+		"break",
+		"breakc",
+		"mova",
+		"defb",
+		"defi",
+
+		NULL,
+		NULL,
+		NULL,
+		NULL,
+		NULL,
+		NULL,
+		NULL,
+		NULL,
+		NULL,
+		NULL,
+		NULL,
+		NULL,
+		NULL,
+		NULL,
+		NULL,
+
+		"texcoord",
+		"texkill",
+		"tex",
+		"texbem",
+		"texbem1",
+		"texreg2ar",
+		"texreg2gb",
+		"texm3x2pad",
+		"texm3x2tex",
+		"texm3x3pad",
+		"texm3x3tex",
+		"texm3x3diff",
+		"texm3x3spec",
+		"texm3x3vspec",
+		"expp",
+		"logp",
+		"cnd",
+		"def",
+		"texreg2rgb",
+		"texdp3tex",
+		"texm3x2depth",
+		"texdp3",
+		"texm3x3",
+		"texdepth",
+		"cmp",
+		"bem",
+		"dp2add",
+		"dsx",
+		"dsy",
+		"texldd",
+		"setp",
+		"texldl",
+		"breakp",
+	};
+	BX_STATIC_ASSERT(BX_COUNTOF(s_dx9bcOpcode) == Dx9bcOpcode::Count);
+
+	const char* getName(Dx9bcOpcode::Enum _opcode)
+	{
+		BX_CHECK(_opcode < Dx9bcOpcode::Count, "Unknown opcode id %d (%x).", _opcode, _opcode);
+		return s_dx9bcOpcode[_opcode];
+	}
+
+	static const char* s_dx9bcOperandType[] =
+	{
+		"r",           // Temporary Register File
+		"v",           // Input Register File
+		"c",           // Constant Register File
+		"t",           // Texture Register File (PS)
+		"oPos",        // Rasterizer Register File
+		"oD",          // Attribute Output Register File
+		"oT",          // Texture Coordinate Output Register File
+		"output",      // Output register file for VS3.0+
+		"i",           // Constant Integer Vector Register File
+		"oColor",      // Color Output Register File
+		"oDepth",      // Depth Output Register File
+		"s",           // Sampler State Register File
+		"c",           // Constant Register File  2048 - 4095
+		"c",           // Constant Register File  4096 - 6143
+		"c",           // Constant Register File  6144 - 8191
+		"b",           // Constant Boolean register file
+		"aL",          // Loop counter register file
+		"tempfloat16", // 16-bit float temp register file
+		"misctype",    // Miscellaneous (single) registers.
+		"label",       // Label
+		"p",           // Predicate register
+	};
+	BX_STATIC_ASSERT(BX_COUNTOF(s_dx9bcOperandType) == Dx9bcOperandType::Count);
+
+	static const char* s_dx9bcDeclUsage[] =
+	{
+		"position",
+		"blendweight",
+		"blendindices",
+		"normal",
+		"psize",
+		"texcoord",
+		"tangent",
+		"binormal",
+		"tessfactor",
+		"positiont",
+		"color",
+		"fog",
+		"depth",
+		"sample",
+	};
+	BX_STATIC_ASSERT(BX_COUNTOF(s_dx9bcDeclUsage) == Dx9bcDeclUsage::Count);
+
+	int32_t read(bx::ReaderI* _reader, Dx9bcSubOperand& _subOperand)
+	{
+		int32_t size = 0;
+
+		uint32_t token;
+		size += bx::read(_reader, token);
+
+		_subOperand.type        =   Dx9bcOperandType::Enum( ( (token & UINT32_C(0x70000000) ) >> 28)
+														  | ( (token & UINT32_C(0x00001800) ) >>  8) );
+		_subOperand.regIndex    =                             (token & UINT32_C(0x000007ff) );
+		_subOperand.swizzleBits =                           ( (token & UINT32_C(0x00ff0000) ) >> 16);
+
+		return size;
+	}
+
+	int32_t write(bx::WriterI* _writer, const Dx9bcSubOperand& _subOperand)
+	{
+		int32_t size = 0;
+
+		uint32_t token = 0;
+		token |= (_subOperand.type        << 28) & UINT32_C(0x70000000);
+		token |= (_subOperand.type        <<  8) & UINT32_C(0x00001800);
+		token |=  _subOperand.regIndex           & UINT32_C(0x000007ff);
+		token |= (_subOperand.swizzleBits << 16) & UINT32_C(0x00ff0000);
+		size += bx::write(_writer, token);
+
+		return size;
+	}
+
+	int32_t read(bx::ReaderI* _reader, Dx9bcOperand& _operand)
+	{
+		int32_t size = 0;
+
+		uint32_t token;
+		size += bx::read(_reader, token);
+
+		_operand.type     =   Dx9bcOperandType::Enum( ( (token & UINT32_C(0x70000000) ) >> 28)
+													| ( (token & UINT32_C(0x00001800) ) >>  8) );
+		_operand.regIndex =                             (token & UINT32_C(0x000007ff) );
+		_operand.addrMode = Dx9bcOperandAddrMode::Enum( (token & UINT32_C(0x00002000) ) >> 13);
+
+		if (_operand.destination)
+		{
+			// Destination Parameter Token
+			// https://msdn.microsoft.com/en-us/library/ff552738.aspx
+
+			_operand.writeMask        =    ( (token & UINT32_C(0x000f0000) ) >> 16);
+			_operand.saturate         = 0 != (token & UINT32_C(0x00100000) );
+			_operand.partialPrecision = 0 != (token & UINT32_C(0x00200000) );
+			_operand.centroid         = 0 != (token & UINT32_C(0x00400000) );
+		}
+		else
+		{
+			// Source Parameter Token
+			// https://msdn.microsoft.com/en-us/library/ff569716%28v=vs.85%29.aspx
+
+			_operand.writeMask        = 0;
+			_operand.saturate         = false;
+			_operand.partialPrecision = false;
+			_operand.centroid         = false;
+			_operand.swizzleBits      = ( (token & UINT32_C(0x00ff0000) ) >> 16);
+		}
+
+		if (Dx9bcOperandAddrMode::Relative == _operand.addrMode)
+		{
+			size += read(_reader, _operand.subOperand);
+		}
+
+		return size;
+	}
+
+	int32_t write(bx::WriterI* _writer, const Dx9bcOperand& _operand)
+	{
+		int32_t size = 0;
+
+		uint32_t token = 0;
+		token |= (_operand.type     << 28) & UINT32_C(0x70000000);
+		token |= (_operand.type     <<  8) & UINT32_C(0x00001800);
+		token |=  _operand.regIndex        & UINT32_C(0x000007ff);
+		token |= (_operand.addrMode << 13) & UINT32_C(0x00002000);
+		size += bx::write(_writer, token);
+
+		if (Dx9bcOperandAddrMode::Relative == _operand.addrMode)
+		{
+			size += write(_writer, _operand.subOperand);
+		}
+
+		return size;
+	}
+
+	int32_t read(bx::ReaderI* _reader, Dx9bcInstruction& _instruction)
+	{
+		int32_t size = 0;
+
+		uint32_t token;
+		size += bx::read(_reader, token);
+
+		_instruction.opcode = Dx9bcOpcode::Enum( (token & UINT32_C(0x0000ffff) ) );
+
+		if (Dx9bcOpcode::Comment == _instruction.opcode)
+		{
+			_instruction.specific   = 0;
+			_instruction.length     = uint16_t( (token & UINT32_C(0x7fff0000) ) >> 16) + 1;
+			_instruction.predicated = false;
+			_instruction.coissue    = false;
+		}
+		else
+		{
+			_instruction.specific   = uint8_t( (token & UINT32_C(0x00ff0000) ) >> 16);
+			_instruction.length     = uint8_t( (token & UINT32_C(0x0f000000) ) >> 24) + 1;
+			_instruction.predicated =     0 != (token & UINT32_C(0x10000000) );
+			_instruction.coissue    =     0 != (token & UINT32_C(0x40000000) );
+		}
+
+		if (Dx9bcOpcode::Count <= _instruction.opcode)
+		{
+			if (Dx9bcOpcode::Comment == _instruction.opcode)
+			{
+				for (int32_t ii = 0, num = _instruction.length-1; ii < num; ++ii)
+				{
+					uint32_t tmp;
+					size += bx::read(_reader, tmp);
+				}
+			}
+
+			return size;
+		}
+
+		uint32_t currOp = 0;
+
+		const Dx9bcOpcodeInfo& info = s_dx9bcOpcodeInfo[bx::uint32_min(_instruction.opcode, Dx9bcOpcode::Count)];
+		_instruction.numOperands = info.numOperands;
+		_instruction.numValues   = info.numValues;
+
+		switch (_instruction.opcode)
+		{
+		case Dx9bcOpcode::SINCOS:
+			if (5 > _instruction.length)
+			{
+				_instruction.numOperands = 2;
+			}
+			break;
+
+		default:
+			break;
+		};
+
+//BX_TRACE("%d (%d), %d, %d, 0x%08x"
+//		, _instruction.opcode
+//		, bx::uint32_min(_instruction.opcode, Dx9bcOpcode::Count)
+//		, _instruction.length
+//		, _instruction.numOperands
+//		, token
+//		);
+
+		const bool valuesBeforeOpcode = false
+				|| Dx9bcOpcode::DCL == _instruction.opcode
+				;
+
+		if (valuesBeforeOpcode
+		&&  0 < info.numValues)
+		{
+			size += read(_reader, _instruction.value, info.numValues*sizeof(uint32_t) );
+		}
+
+		_instruction.operand[0].destination = true;
+
+		switch (_instruction.numOperands)
+		{
+		case 6: size += read(_reader, _instruction.operand[currOp++]);
+		case 5: size += read(_reader, _instruction.operand[currOp++]);
+		case 4: size += read(_reader, _instruction.operand[currOp++]);
+		case 3: size += read(_reader, _instruction.operand[currOp++]);
+		case 2: size += read(_reader, _instruction.operand[currOp++]);
+		case 1: size += read(_reader, _instruction.operand[currOp++]);
+		case 0:
+			if (!valuesBeforeOpcode
+			&&  0 < info.numValues)
+			{
+				size += read(_reader, _instruction.value, info.numValues*sizeof(uint32_t) );
+			}
+			break;
+
+		default:
+			BX_CHECK(false, "Instruction %s with invalid number of operands %d (numValues %d)."
+					, getName(_instruction.opcode)
+					, _instruction.numOperands
+					, info.numValues
+					);
+			break;
+		}
+
+		return size;
+	}
+
+	int32_t write(bx::WriterI* _writer, const Dx9bcInstruction& _instruction)
+	{
+		int32_t size = 0;
+
+		uint32_t token = 0;
+		token |=    _instruction.opcode             & UINT32_C(0x0000ffff);
+		token |=   (_instruction.specific    << 16) & UINT32_C(0x00ff0000);
+		token |= ( (_instruction.length - 1) << 24) & UINT32_C(0x0f000000);
+		size += bx::write(_writer, token);
+
+		uint32_t currOp = 0;
+		switch (_instruction.numOperands)
+		{
+		case 6: size += write(_writer, _instruction.operand[currOp++]);
+		case 5: size += write(_writer, _instruction.operand[currOp++]);
+		case 4: size += write(_writer, _instruction.operand[currOp++]);
+		case 3: size += write(_writer, _instruction.operand[currOp++]);
+		case 2: size += write(_writer, _instruction.operand[currOp++]);
+		case 1: size += write(_writer, _instruction.operand[currOp++]);
+		case 0:
+			break;
+		}
+
+		return 0;
+	}
+
+	int32_t toString(char* _out, int32_t _size, const Dx9bcInstruction& _instruction)
+	{
+		int32_t size = 0;
+
+		if (Dx9bcOpcode::Comment == _instruction.opcode
+		||  Dx9bcOpcode::Phase   == _instruction.opcode)
+		{
+			size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
+						, "// %x"
+						, _instruction.opcode
+						);
+			return size;
+		}
+
+		size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
+							, "%2d %s"
+							, _instruction.opcode
+							, getName(_instruction.opcode)
+							);
+
+		switch (_instruction.opcode)
+		{
+		case Dx9bcOpcode::DCL:
+			size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
+					, "_%s%d (%d, %d, %d, %d)"
+					, s_dx9bcDeclUsage[_instruction.value[0] & UINT32_C(0x0000000f)]
+					, (_instruction.value[0] & UINT32_C(0x000f0000) )>>16
+					, (_instruction.value[0] & UINT32_C(0x08000000) )>>27 // ?
+					, (_instruction.value[0] & UINT32_C(0x10000000) )>>28 // texture2d
+					, (_instruction.value[0] & UINT32_C(0x20000000) )>>29 // textureCube
+					, (_instruction.value[0] & UINT32_C(0x40000000) )>>30 // texture3d
+					);
+			break;
+
+		default:
+			break;
+		}
+
+		for (uint32_t ii = 0; ii < _instruction.numOperands; ++ii)
+		{
+			const Dx9bcOperand& operand = _instruction.operand[ii];
+			size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
+								, "%s%s%d"
+								, 0 == ii ? " " : ", "
+								, s_dx9bcOperandType[operand.type]
+								, operand.regIndex
+								);
+
+			if (operand.destination)
+			{
+				if (0xf > operand.writeMask
+				&&  0   < operand.writeMask)
+				{
+					size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
+										, ".%s%s%s%s"
+										, 0 == (operand.writeMask & 1) ? "" : "x"
+										, 0 == (operand.writeMask & 2) ? "" : "y"
+										, 0 == (operand.writeMask & 4) ? "" : "z"
+										, 0 == (operand.writeMask & 8) ? "" : "w"
+										);
+				}
+			}
+			else
+			{
+				if (Dx9bcOperandAddrMode::Relative == operand.addrMode)
+				{
+					const bool array = true;
+
+					size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
+										, "["
+										);
+
+					size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
+										, "%s%d"
+										, s_dx9bcOperandType[operand.subOperand.type]
+										, operand.subOperand.regIndex
+										);
+
+					size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
+										, "%s"
+										, array ? "]" : ""
+										);
+				}
+
+				if (0xe4 != operand.swizzleBits)
+				{
+					size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
+										, ".%c%c%c%c"
+										, "xyzw"[(operand.swizzleBits   )&0x3]
+										, "xyzw"[(operand.swizzleBits>>2)&0x3]
+										, "xyzw"[(operand.swizzleBits>>4)&0x3]
+										, "xyzw"[(operand.swizzleBits>>6)&0x3]
+										);
+				}
+			}
+		}
+
+		switch (_instruction.opcode)
+		{
+		case Dx9bcOpcode::DEF:
+			for (uint32_t jj = 0; jj < _instruction.numValues; ++jj)
+			{
+				union { int32_t i; float f; } cast = { _instruction.value[jj] };
+				size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
+						, "%s%f%s"
+						, 0 == jj ? " (" : ", "
+						, cast.f
+						, uint32_t(_instruction.numValues-1) == jj ? ")" : ""
+						);
+			}
+			break;
+
+		case Dx9bcOpcode::DEFI:
+			for (uint32_t jj = 0; jj < _instruction.numValues; ++jj)
+			{
+				size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
+						, "%s%d%s"
+						, 0 == jj ? " (" : ", "
+						, _instruction.value[jj]
+						, uint32_t(_instruction.numValues-1) == jj ? ")" : ""
+						);
+			}
+			break;
+
+		default:
+			break;
+		}
+
+		return size;
+	}
+
+	int32_t read(bx::ReaderSeekerI* _reader, Dx9bcShader& _shader)
+	{
+		int32_t size = 0;
+		int64_t offset = bx::seek(_reader);
+
+		for (;;)
+		{
+			Dx9bcInstruction instruction;
+			int32_t length = read(_reader, instruction);
+			size += length;
+
+			if (Dx9bcOpcode::Count > instruction.opcode)
+			{
+				char temp[512];
+				toString(temp, 512, instruction);
+
+				BX_CHECK(length/4 == instruction.length
+						, "%s\nread %d, expected %d"
+						, temp
+						, length/4
+						, instruction.length
+						);
+			}
+			else
+			{
+				if (Dx9bcOpcode::End == instruction.opcode)
+				{
+					size -= length;
+					break;
+				}
+			}
+		}
+
+		bx::seek(_reader, offset, bx::Whence::Begin);
+
+		_shader.byteCode.resize(size);
+		bx::read(_reader, _shader.byteCode.data(), size);
+
+		return size;
+	}
+
+	int32_t write(bx::WriterI* _writer, const Dx9bcShader& _shader)
+	{
+		BX_UNUSED(_writer, _shader);
+		return 0;
+	}
+
+	int32_t read(bx::ReaderSeekerI* _reader, Dx9bc& _bc)
+	{
+		int32_t size = 0;
+
+		size += bx::read(_reader, _bc.version);
+
+		bool pixelShader = (0xffff0000 == (_bc.version & 0xffff0000) );
+		uint32_t versionMajor = (_bc.version>>8)&0xff;
+		uint32_t versionMinor = _bc.version&0xff;
+		BX_TRACE("%s shader %d.%d"
+			, pixelShader ? "pixel" : "vertex"
+			, versionMajor
+			, versionMinor
+			);
+
+		size += read(_reader, _bc.shader);
+
+		return size;
+	}
+
+	int32_t write(bx::WriterSeekerI* _writer, const Dx9bc& _dxbc)
+	{
+		BX_UNUSED(_writer, _dxbc);
+		return 0;
+	}
+
+	void parse(const Dx9bcShader& _src, Dx9bcParseFn _fn, void* _userData)
+	{
+		bx::MemoryReader reader(_src.byteCode.data(), uint32_t(_src.byteCode.size() ) );
+
+//BX_TRACE("parse %d", _src.byteCode.size());
+
+		for (uint32_t token = 0, numTokens = uint32_t(_src.byteCode.size() / sizeof(uint32_t) ); token < numTokens;)
+		{
+			Dx9bcInstruction instruction;
+			uint32_t size = read(&reader, instruction);
+
+			BX_CHECK(size/4 == instruction.length, "read %d, expected %d", size/4, instruction.length);
+
+			_fn(token * sizeof(uint32_t), instruction, _userData);
+
+			token += instruction.length;
+		}
+	}
+
+	void filter(Dx9bcShader& _dst, const Dx9bcShader& _src, Dx9bcFilterFn _fn, void* _userData)
+	{
+		bx::MemoryReader reader(_src.byteCode.data(), uint32_t(_src.byteCode.size() ) );
+
+		bx::CrtAllocator r;
+		bx::MemoryBlock mb(&r);
+		bx::MemoryWriter writer(&mb);
+
+		for (uint32_t token = 0, numTokens = uint32_t(_src.byteCode.size() / sizeof(uint32_t) ); token < numTokens;)
+		{
+			Dx9bcInstruction instruction;
+			uint32_t size = read(&reader, instruction);
+			BX_CHECK(size/4 == instruction.length, "read %d, expected %d", size/4, instruction.length);
+
+			_fn(instruction, _userData);
+
+			write(&writer, instruction);
+
+			token += instruction.length;
+		}
+
+		uint8_t* data = (uint8_t*)mb.more();
+		uint32_t size = uint32_t(bx::getSize(&writer) );
+		_dst.byteCode.reserve(size);
+		memcpy(_dst.byteCode.data(), data, size);
+	}
+
+} // namespace bgfx

+ 258 - 0
src/shader_dx9bc.h

@@ -0,0 +1,258 @@
+/*
+ * Copyright 2011-2015 Branimir Karadzic. All rights reserved.
+ * License: http://www.opensource.org/licenses/BSD-2-Clause
+ */
+
+#ifndef BGFX_SHADER_DX9BC_H
+#define BGFX_SHADER_DX9BC_H
+
+#include <bx/readerwriter.h>
+
+namespace bgfx
+{
+	struct Dx9bcOpcode
+	{
+		enum Enum
+		{
+			NOP,
+			MOV,
+			ADD,
+			SUB,
+			MAD,
+			MUL,
+			RCP,
+			RSQ,
+			DP3,
+			DP4,
+			MIN,
+			MAX,
+			SLT,
+			SGE,
+			EXP,
+			LOG,
+			LIT,
+			DST,
+			LRP,
+			FRC,
+			M4X4,
+			M4X3,
+			M3X4,
+			M3X3,
+			M3X2,
+			CALL,
+			CALLNZ,
+			LOOP,
+			RET,
+			ENDLOOP,
+			LABEL,
+			DCL,
+			POW,
+			CRS,
+			SGN,
+			ABS,
+			NRM,
+			SINCOS,
+			REP,
+			ENDREP,
+			IF,
+			IFC,
+			ELSE,
+			ENDIF,
+			BREAK,
+			BREAKC,
+			MOVA,
+			DEFB,
+			DEFI,
+
+			Unknown = 63,
+			TEXCOORD,
+			TEXKILL,
+			TEX,
+			TEXBEM,
+			TEXBEM1,
+			TEXREG2AR,
+			TEXREG2GB,
+			TEXM3X2PAD,
+			TEXM3X2TEX,
+			TEXM3X3PAD,
+			TEXM3X3TEX,
+			TEXM3X3DIFF,
+			TEXM3X3SPEC,
+			TEXM3X3VSPEC,
+			EXPP,
+			LOGP,
+			CND,
+			DEF,
+			TEXREG2RGB,
+			TEXDP3TEX,
+			TEXM3X2DEPTH,
+			TEXDP3,
+			TEXM3X3,
+			TEXDEPTH,
+			CMP,
+			BEM,
+			DP2ADD,
+			DSX,
+			DSY,
+			TEXLDD,
+			SETP,
+			TEXLDL,
+			BREAKP,
+
+			Count,
+
+			Phase   = 0xfffd,
+			Comment = 0xfffe,
+			End     = 0xffff
+		};
+	};
+
+	const char* getName(Dx9bcOpcode::Enum _opcode);
+
+	struct Dx9bcResourceDim
+	{
+		enum Enum
+		{
+			Unknown,
+			Texture1D,
+			Texture2D,
+			TextureCube,
+			Texture3D,
+		};
+	};
+
+	struct Dx9bcOperandType
+	{
+		enum Enum
+		{
+			Temp,
+			Input,
+			Const,
+			Texture,
+			RastOut,
+			AttrOut,
+			TexCrdOut,
+			Output,
+			ConstInt,
+			ColorOut,
+			DepthOut,
+			Sampler,
+			Const2,
+			Const3,
+			Const4,
+			ConstBool,
+			Loop,
+			TempFloat16,
+			MiscType,
+			Label,
+			Predicate,
+
+			Count
+		};
+	};
+
+	struct Dx9bcDeclUsage
+	{
+		enum Enum
+		{
+			Position,
+			BlendWeight,
+			BlendIndices,
+			Normal,
+			Psize,
+			Texcoord,
+			Tangent,
+			Binormal,
+			TessFactor,
+			PositionT,
+			Color,
+			Fog,
+			Depth,
+			Sample,
+
+			Count
+  		};
+	};
+
+	struct Dx9bcOperandAddrMode
+	{
+		enum Enum
+		{
+			Absolute,
+			Relative,
+
+			Count
+		};
+	};
+
+	struct Dx9bcSubOperand
+	{
+		Dx9bcOperandType::Enum type;
+		uint32_t regIndex;
+		uint8_t swizzleBits;
+	};
+
+	struct Dx9bcOperand
+	{
+		Dx9bcOperandType::Enum type;
+		uint32_t regIndex;
+
+		bool destination;
+
+		// Destination
+		uint8_t writeMask;
+		bool saturate;
+		bool partialPrecision;
+		bool centroid;
+
+		// Source
+		uint8_t swizzleBits;
+
+		Dx9bcOperandAddrMode::Enum addrMode;
+		Dx9bcSubOperand subOperand;
+	};
+
+	struct Dx9bcInstruction
+	{
+		Dx9bcOpcode::Enum opcode;
+		uint16_t length;
+		uint8_t numOperands;
+		uint8_t numValues;
+		uint8_t specific;
+		bool predicated;
+		bool coissue;
+
+		Dx9bcOperand operand[6];
+		int32_t value[4];
+	};
+
+	int32_t read(bx::ReaderI* _reader, Dx9bcInstruction& _instruction);
+	int32_t write(bx::WriterI* _writer, const Dx9bcInstruction& _instruction);
+	int32_t toString(char* _out, int32_t _size, const Dx9bcInstruction& _instruction);
+
+	struct Dx9bcShader
+	{
+		stl::vector<uint8_t> byteCode;
+	};
+
+	int32_t read(bx::ReaderSeekerI* _reader, Dx9bcShader& _shader);
+	int32_t write(bx::WriterI* _writer, const Dx9bcShader& _shader);
+
+	struct Dx9bc
+	{
+		uint32_t version;
+		Dx9bcShader shader;
+	};
+
+	int32_t read(bx::ReaderSeekerI* _reader, Dx9bc& _dx9bc);
+	int32_t write(bx::WriterSeekerI* _writer, const Dx9bc& _dx9bc);
+
+	typedef void (*Dx9bcParseFn)(uint32_t _offset, const Dx9bcInstruction& _instruction, void* _userData);
+	void parse(const Dx9bcShader& _src, Dx9bcParseFn _fn, void* _userData);
+
+	typedef void (*Dx9bcFilterFn)(Dx9bcInstruction& _instruction, void* _userData);
+	void filter(Dx9bcShader& _dst, const Dx9bcShader& _src, Dx9bcFilterFn _fn, void* _userData);
+
+} // namespace bgfx
+
+#endif // BGFX_SHADER_DX9BC_H

+ 1851 - 0
src/shader_dxbc.cpp

@@ -0,0 +1,1851 @@
+/*
+ * Copyright 2011-2015 Branimir Karadzic. All rights reserved.
+ * License: http://www.opensource.org/licenses/BSD-2-Clause
+ */
+
+#include "bgfx_p.h"
+#include "shader_dxbc.h"
+
+namespace bgfx
+{
+	struct DxbcOpcodeInfo
+	{
+		uint8_t numOperands;
+		uint8_t numValues;
+	};
+
+	static const DxbcOpcodeInfo s_dxbcOpcodeInfo[] =
+	{
+		{ 3, 0 }, // ADD
+		{ 3, 0 }, // AND
+		{ 0, 0 }, // BREAK
+		{ 1, 0 }, // BREAKC
+		{ 0, 0 }, // CALL
+		{ 0, 0 }, // CALLC
+		{ 1, 0 }, // CASE
+		{ 0, 0 }, // CONTINUE
+		{ 1, 0 }, // CONTINUEC
+		{ 0, 0 }, // CUT
+		{ 0, 0 }, // DEFAULT
+		{ 2, 0 }, // DERIV_RTX
+		{ 2, 0 }, // DERIV_RTY
+		{ 1, 0 }, // DISCARD
+		{ 3, 0 }, // DIV
+		{ 3, 0 }, // DP2
+		{ 3, 0 }, // DP3
+		{ 3, 0 }, // DP4
+		{ 0, 0 }, // ELSE
+		{ 0, 0 }, // EMIT
+		{ 0, 0 }, // EMITTHENCUT
+		{ 0, 0 }, // ENDIF
+		{ 0, 0 }, // ENDLOOP
+		{ 0, 0 }, // ENDSWITCH
+		{ 3, 0 }, // EQ
+		{ 2, 0 }, // EXP
+		{ 2, 0 }, // FRC
+		{ 2, 0 }, // FTOI
+		{ 2, 0 }, // FTOU
+		{ 3, 0 }, // GE
+		{ 3, 0 }, // IADD
+		{ 1, 0 }, // IF
+		{ 3, 0 }, // IEQ
+		{ 3, 0 }, // IGE
+		{ 3, 0 }, // ILT
+		{ 4, 0 }, // IMAD
+		{ 3, 0 }, // IMAX
+		{ 3, 0 }, // IMIN
+		{ 4, 0 }, // IMUL
+		{ 3, 0 }, // INE
+		{ 2, 0 }, // INEG
+		{ 3, 0 }, // ISHL
+		{ 3, 0 }, // ISHR
+		{ 2, 0 }, // ITOF
+		{ 0, 0 }, // LABEL
+		{ 3, 0 }, // LD
+		{ 4, 0 }, // LD_MS
+		{ 2, 0 }, // LOG
+		{ 0, 0 }, // LOOP
+		{ 3, 0 }, // LT
+		{ 4, 0 }, // MAD
+		{ 3, 0 }, // MIN
+		{ 3, 0 }, // MAX
+		{ 0, 1 }, // CUSTOMDATA
+		{ 2, 0 }, // MOV
+		{ 4, 0 }, // MOVC
+		{ 3, 0 }, // MUL
+		{ 3, 0 }, // NE
+		{ 0, 0 }, // NOP
+		{ 2, 0 }, // NOT
+		{ 3, 0 }, // OR
+		{ 3, 0 }, // RESINFO
+		{ 0, 0 }, // RET
+		{ 1, 0 }, // RETC
+		{ 2, 0 }, // ROUND_NE
+		{ 2, 0 }, // ROUND_NI
+		{ 2, 0 }, // ROUND_PI
+		{ 2, 0 }, // ROUND_Z
+		{ 2, 0 }, // RSQ
+		{ 4, 0 }, // SAMPLE
+		{ 5, 0 }, // SAMPLE_C
+		{ 5, 0 }, // SAMPLE_C_LZ
+		{ 5, 0 }, // SAMPLE_L
+		{ 6, 0 }, // SAMPLE_D
+		{ 5, 0 }, // SAMPLE_B
+		{ 2, 0 }, // SQRT
+		{ 1, 0 }, // SWITCH
+		{ 3, 0 }, // SINCOS
+		{ 3, 0 }, // UDIV
+		{ 3, 0 }, // ULT
+		{ 3, 0 }, // UGE
+		{ 4, 0 }, // UMUL
+		{ 4, 0 }, // UMAD
+		{ 3, 0 }, // UMAX
+		{ 3, 0 }, // UMIN
+		{ 3, 0 }, // USHR
+		{ 2, 0 }, // UTOF
+		{ 3, 0 }, // XOR
+		{ 1, 1 }, // DCL_RESOURCE
+		{ 1, 0 }, // DCL_CONSTANT_BUFFER
+		{ 1, 0 }, // DCL_SAMPLER
+		{ 1, 1 }, // DCL_INDEX_RANGE
+		{ 1, 0 }, // DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY
+		{ 1, 0 }, // DCL_GS_INPUT_PRIMITIVE
+		{ 0, 1 }, // DCL_MAX_OUTPUT_VERTEX_COUNT
+		{ 1, 0 }, // DCL_INPUT
+		{ 1, 1 }, // DCL_INPUT_SGV
+		{ 1, 0 }, // DCL_INPUT_SIV
+		{ 1, 0 }, // DCL_INPUT_PS
+		{ 1, 1 }, // DCL_INPUT_PS_SGV
+		{ 1, 1 }, // DCL_INPUT_PS_SIV
+		{ 1, 0 }, // DCL_OUTPUT
+		{ 1, 0 }, // DCL_OUTPUT_SGV
+		{ 1, 1 }, // DCL_OUTPUT_SIV
+		{ 0, 1 }, // DCL_TEMPS
+		{ 0, 3 }, // DCL_INDEXABLE_TEMP
+		{ 0, 0 }, // DCL_GLOBAL_FLAGS
+
+		{ 0, 0 }, // InstrD3D10
+		{ 4, 0 }, // LOD
+		{ 4, 0 }, // GATHER4
+		{ 0, 0 }, // SAMPLE_POS
+		{ 0, 0 }, // SAMPLE_INFO
+
+		{ 0, 0 }, // InstrD3D10_1
+		{ 0, 0 }, // HS_DECLS
+		{ 0, 0 }, // HS_CONTROL_POINT_PHASE
+		{ 0, 0 }, // HS_FORK_PHASE
+		{ 0, 0 }, // HS_JOIN_PHASE
+		{ 0, 0 }, // EMIT_STREAM
+		{ 0, 0 }, // CUT_STREAM
+		{ 1, 0 }, // EMITTHENCUT_STREAM
+		{ 1, 0 }, // INTERFACE_CALL
+		{ 0, 0 }, // BUFINFO
+		{ 2, 0 }, // DERIV_RTX_COARSE
+		{ 2, 0 }, // DERIV_RTX_FINE
+		{ 2, 0 }, // DERIV_RTY_COARSE
+		{ 2, 0 }, // DERIV_RTY_FINE
+		{ 5, 0 }, // GATHER4_C
+		{ 5, 0 }, // GATHER4_PO
+		{ 0, 0 }, // GATHER4_PO_C
+		{ 0, 0 }, // RCP
+		{ 0, 0 }, // F32TOF16
+		{ 0, 0 }, // F16TOF32
+		{ 0, 0 }, // UADDC
+		{ 0, 0 }, // USUBB
+		{ 0, 0 }, // COUNTBITS
+		{ 0, 0 }, // FIRSTBIT_HI
+		{ 0, 0 }, // FIRSTBIT_LO
+		{ 0, 0 }, // FIRSTBIT_SHI
+		{ 0, 0 }, // UBFE
+		{ 0, 0 }, // IBFE
+		{ 5, 0 }, // BFI
+		{ 0, 0 }, // BFREV
+		{ 5, 0 }, // SWAPC
+		{ 0, 0 }, // DCL_STREAM
+		{ 1, 0 }, // DCL_FUNCTION_BODY
+		{ 0, 0 }, // DCL_FUNCTION_TABLE
+		{ 0, 0 }, // DCL_INTERFACE
+		{ 0, 0 }, // DCL_INPUT_CONTROL_POINT_COUNT
+		{ 0, 0 }, // DCL_OUTPUT_CONTROL_POINT_COUNT
+		{ 0, 0 }, // DCL_TESS_DOMAIN
+		{ 0, 0 }, // DCL_TESS_PARTITIONING
+		{ 0, 0 }, // DCL_TESS_OUTPUT_PRIMITIVE
+		{ 0, 0 }, // DCL_HS_MAX_TESSFACTOR
+		{ 0, 0 }, // DCL_HS_FORK_PHASE_INSTANCE_COUNT
+		{ 0, 0 }, // DCL_HS_JOIN_PHASE_INSTANCE_COUNT
+		{ 0, 3 }, // DCL_THREAD_GROUP
+		{ 1, 1 }, // DCL_UNORDERED_ACCESS_VIEW_TYPED
+		{ 1, 0 }, // DCL_UNORDERED_ACCESS_VIEW_RAW
+		{ 1, 1 }, // DCL_UNORDERED_ACCESS_VIEW_STRUCTURED
+		{ 1, 1 }, // DCL_THREAD_GROUP_SHARED_MEMORY_RAW
+		{ 1, 2 }, // DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED
+		{ 1, 0 }, // DCL_RESOURCE_RAW
+		{ 1, 1 }, // DCL_RESOURCE_STRUCTURED
+		{ 3, 0 }, // LD_UAV_TYPED
+		{ 3, 0 }, // STORE_UAV_TYPED
+		{ 3, 0 }, // LD_RAW
+		{ 3, 0 }, // STORE_RAW
+		{ 4, 0 }, // LD_STRUCTURED
+		{ 4, 0 }, // STORE_STRUCTURED
+		{ 3, 0 }, // ATOMIC_AND
+		{ 3, 0 }, // ATOMIC_OR
+		{ 3, 0 }, // ATOMIC_XOR
+		{ 3, 0 }, // ATOMIC_CMP_STORE
+		{ 3, 0 }, // ATOMIC_IADD
+		{ 3, 0 }, // ATOMIC_IMAX
+		{ 3, 0 }, // ATOMIC_IMIN
+		{ 3, 0 }, // ATOMIC_UMAX
+		{ 3, 0 }, // ATOMIC_UMIN
+		{ 2, 0 }, // IMM_ATOMIC_ALLOC
+		{ 2, 0 }, // IMM_ATOMIC_CONSUME
+		{ 0, 0 }, // IMM_ATOMIC_IADD
+		{ 0, 0 }, // IMM_ATOMIC_AND
+		{ 0, 0 }, // IMM_ATOMIC_OR
+		{ 0, 0 }, // IMM_ATOMIC_XOR
+		{ 0, 0 }, // IMM_ATOMIC_EXCH
+		{ 0, 0 }, // IMM_ATOMIC_CMP_EXCH
+		{ 0, 0 }, // IMM_ATOMIC_IMAX
+		{ 0, 0 }, // IMM_ATOMIC_IMIN
+		{ 0, 0 }, // IMM_ATOMIC_UMAX
+		{ 0, 0 }, // IMM_ATOMIC_UMIN
+		{ 0, 0 }, // SYNC
+		{ 3, 0 }, // DADD
+		{ 3, 0 }, // DMAX
+		{ 3, 0 }, // DMIN
+		{ 3, 0 }, // DMUL
+		{ 3, 0 }, // DEQ
+		{ 3, 0 }, // DGE
+		{ 3, 0 }, // DLT
+		{ 3, 0 }, // DNE
+		{ 2, 0 }, // DMOV
+		{ 4, 0 }, // DMOVC
+		{ 0, 0 }, // DTOF
+		{ 0, 0 }, // FTOD
+		{ 3, 0 }, // EVAL_SNAPPED
+		{ 3, 0 }, // EVAL_SAMPLE_INDEX
+		{ 2, 0 }, // EVAL_CENTROID
+		{ 0, 1 }, // DCL_GS_INSTANCE_COUNT
+		{ 0, 0 }, // ABORT
+		{ 0, 0 }, // DEBUG_BREAK
+
+		{ 0, 0 }, // InstrD3D11
+		{ 0, 0 }, // DDIV
+		{ 0, 0 }, // DFMA
+		{ 0, 0 }, // DRCP
+		{ 0, 0 }, // MSAD
+		{ 0, 0 }, // DTOI
+		{ 0, 0 }, // DTOU
+		{ 0, 0 }, // ITOD
+		{ 0, 0 }, // UTOD
+	};
+	BX_STATIC_ASSERT(BX_COUNTOF(s_dxbcOpcodeInfo) == DxbcOpcode::Count);
+
+	static const char* s_dxbcOpcode[] =
+	{
+		"add",
+		"and",
+		"break",
+		"breakc",
+		"call",
+		"callc",
+		"case",
+		"continue",
+		"continuec",
+		"cut",
+		"default",
+		"deriv_rtx",
+		"deriv_rty",
+		"discard",
+		"div",
+		"dp2",
+		"dp3",
+		"dp4",
+		"else",
+		"emit",
+		"emitthencut",
+		"endif",
+		"endloop",
+		"endswitch",
+		"eq",
+		"exp",
+		"frc",
+		"ftoi",
+		"ftou",
+		"ge",
+		"iadd",
+		"if",
+		"ieq",
+		"ige",
+		"ilt",
+		"imad",
+		"imax",
+		"imin",
+		"imul",
+		"ine",
+		"ineg",
+		"ishl",
+		"ishr",
+		"itof",
+		"label",
+		"ld",
+		"ld_ms",
+		"log",
+		"loop",
+		"lt",
+		"mad",
+		"min",
+		"max",
+		"customdata",
+		"mov",
+		"movc",
+		"mul",
+		"ne",
+		"nop",
+		"not",
+		"or",
+		"resinfo",
+		"ret",
+		"retc",
+		"round_ne",
+		"round_ni",
+		"round_pi",
+		"round_z",
+		"rsq",
+		"sample",
+		"sample_c",
+		"sample_c_lz",
+		"sample_l",
+		"sample_d",
+		"sample_b",
+		"sqrt",
+		"switch",
+		"sincos",
+		"udiv",
+		"ult",
+		"uge",
+		"umul",
+		"umad",
+		"umax",
+		"umin",
+		"ushr",
+		"utof",
+		"xor",
+		"dcl_resource",
+		"dcl_constantbuffer",
+		"dcl_sampler",
+		"dcl_index_range",
+		"dcl_gs_output_primitive_topology",
+		"dcl_gs_input_primitive",
+		"dcl_max_output_vertex_count",
+		"dcl_input",
+		"dcl_input_sgv",
+		"dcl_input_siv",
+		"dcl_input_ps",
+		"dcl_input_ps_sgv",
+		"dcl_input_ps_siv",
+		"dcl_output",
+		"dcl_output_sgv",
+		"dcl_output_siv",
+		"dcl_temps",
+		"dcl_indexable_temp",
+		"dcl_global_flags",
+
+		NULL,
+		"lod",
+		"gather4",
+		"sample_pos",
+		"sample_info",
+
+		NULL,
+		"hs_decls",
+		"hs_control_point_phase",
+		"hs_fork_phase",
+		"hs_join_phase",
+		"emit_stream",
+		"cut_stream",
+		"emitthencut_stream",
+		"interface_call",
+		"bufinfo",
+		"deriv_rtx_coarse",
+		"deriv_rtx_fine",
+		"deriv_rty_coarse",
+		"deriv_rty_fine",
+		"gather4_c",
+		"gather4_po",
+		"gather4_po_c",
+		"rcp",
+		"f32tof16",
+		"f16tof32",
+		"uaddc",
+		"usubb",
+		"countbits",
+		"firstbit_hi",
+		"firstbit_lo",
+		"firstbit_shi",
+		"ubfe",
+		"ibfe",
+		"bfi",
+		"bfrev",
+		"swapc",
+		"dcl_stream",
+		"dcl_function_body",
+		"dcl_function_table",
+		"dcl_interface",
+		"dcl_input_control_point_count",
+		"dcl_output_control_point_count",
+		"dcl_tess_domain",
+		"dcl_tess_partitioning",
+		"dcl_tess_output_primitive",
+		"dcl_hs_max_tessfactor",
+		"dcl_hs_fork_phase_instance_count",
+		"dcl_hs_join_phase_instance_count",
+		"dcl_thread_group",
+		"dcl_unordered_access_view_typed",
+		"dcl_unordered_access_view_raw",
+		"dcl_unordered_access_view_structured",
+		"dcl_thread_group_shared_memory_raw",
+		"dcl_thread_group_shared_memory_structured",
+		"dcl_resource_raw",
+		"dcl_resource_structured",
+		"ld_uav_typed",
+		"store_uav_typed",
+		"ld_raw",
+		"store_raw",
+		"ld_structured",
+		"store_structured",
+		"atomic_and",
+		"atomic_or",
+		"atomic_xor",
+		"atomic_cmp_store",
+		"atomic_iadd",
+		"atomic_imax",
+		"atomic_imin",
+		"atomic_umax",
+		"atomic_umin",
+		"imm_atomic_alloc",
+		"imm_atomic_consume",
+		"imm_atomic_iadd",
+		"imm_atomic_and",
+		"imm_atomic_or",
+		"imm_atomic_xor",
+		"imm_atomic_exch",
+		"imm_atomic_cmp_exch",
+		"imm_atomic_imax",
+		"imm_atomic_imin",
+		"imm_atomic_umax",
+		"imm_atomic_umin",
+		"sync",
+		"dadd",
+		"dmax",
+		"dmin",
+		"dmul",
+		"deq",
+		"dge",
+		"dlt",
+		"dne",
+		"dmov",
+		"dmovc",
+		"dtof",
+		"ftod",
+		"eval_snapped",
+		"eval_sample_index",
+		"eval_centroid",
+		"dcl_gs_instance_count",
+		"abort",
+		"debug_break",
+
+		NULL,
+		"ddiv",
+		"dfma",
+		"drcp",
+		"msad",
+		"dtoi",
+		"dtou",
+		"itod",
+		"utod",
+	};
+	BX_STATIC_ASSERT(BX_COUNTOF(s_dxbcOpcode) == DxbcOpcode::Count);
+
+	const char* getName(DxbcOpcode::Enum _opcode)
+	{
+		BX_CHECK(_opcode < DxbcOpcode::Count, "Unknown opcode id %d.", _opcode);
+		return s_dxbcOpcode[_opcode];
+	}
+
+	static const char* s_dxbcSrvType[] =
+	{
+		"",                 // Unknown
+		"Buffer",           // Buffer
+		"Texture1D",        // Texture1D
+		"Texture2D",        // Texture2D
+		"Texture2DMS",      // Texture2DMS
+		"Texture3D",        // Texture3D
+		"TextureCube",      // TextureCube
+		"Texture1DArray",   // Texture1DArray
+		"Texture2DArray",   // Texture2DArray
+		"Texture2DMSArray", // Texture2DMSArray
+		"TextureCubearray", // TextureCubearray
+		"RawBuffer",        // RawBuffer
+		"StructuredBuffer", // StructuredBuffer
+	};
+	BX_STATIC_ASSERT(BX_COUNTOF(s_dxbcSrvType) == DxbcResourceDim::Count);
+
+	static const char* s_dxbcInterpolationName[] =
+	{
+		"",
+		"constant",
+		"linear",
+		"linear centroid",
+		"linear noperspective",
+		"linear noperspective centroid",
+		"linear sample",
+		"linear noperspective sample",
+	};
+	BX_STATIC_ASSERT(BX_COUNTOF(s_dxbcInterpolationName) == DxbcInterpolation::Count);
+
+	// mesa/src/gallium/state_trackers/d3d1x/d3d1xshader/defs/shortfiles.txt
+	static const char* s_dxbcOperandType[] =
+	{
+		"r",                         // Temp
+		"v",                         // Input
+		"o",                         // Output
+		"x",                         // TempArray
+		"l",                         // Imm32
+		"d",                         // Imm64
+		"s",                         // Sampler
+		"t",                         // Resource
+		"cb",                        // ConstantBuffer
+		"icb",                       // ImmConstantBuffer
+		"label",                     // Label
+		"vPrim",                     // PrimitiveID
+		"oDepth",                    // OutputDepth
+		"null",                      // Null
+		"rasterizer",                // Rasterizer
+		"oMask",                     // CoverageMask
+		"stream",                    // Stream
+		"function_body",             // FunctionBody
+		"function_table",            // FunctionTable
+		"interface",                 // Interface
+		"function_input",            // FunctionInput
+		"function_output",           // FunctionOutput
+		"vOutputControlPointID",     // OutputControlPointId
+		"vForkInstanceID",           // InputForkInstanceId
+		"vJoinInstanceID",           // InputJoinInstanceId
+		"vicp",                      // InputControlPoint
+		"vocp",                      // OutputControlPoint
+		"vpc",                       // InputPatchConstant
+		"vDomain",                   // InputDomainPoint
+		"this",                      // ThisPointer
+		"u",                         // UnorderedAccessView
+		"g",                         // ThreadGroupSharedMemory
+		"vThreadID",                 // InputThreadId
+		"vThreadGrouID",             // InputThreadGroupId
+		"vThreadIDInGroup",          // InputThreadIdInGroup
+		"vCoverage",                 // InputCoverageMask
+		"vThreadIDInGroupFlattened", // InputThreadIdInGroupFlattened
+		"vGSInstanceID",             // InputGsInstanceId
+		"oDepthGE",                  // OutputDepthGreaterEqual
+		"oDepthLE",                  // OutputDepthLessEqual
+		"vCycleCounter",             // CycleCounter
+	};
+	BX_STATIC_ASSERT(BX_COUNTOF(s_dxbcOperandType) == DxbcOperandType::Count);
+
+#define DXBC_MAX_NAME_STRING 512
+
+	int32_t readString(bx::ReaderSeekerI* _reader, int64_t _offset, char* _out, uint32_t _max = DXBC_MAX_NAME_STRING)
+	{
+		int64_t oldOffset = bx::seek(_reader);
+		bx::seek(_reader, _offset, bx::Whence::Begin);
+
+		int32_t size = 0;
+
+		for (uint32_t ii = 0; ii < _max-1; ++ii)
+		{
+			char ch;
+			size += bx::read(_reader, ch);
+			*_out++ = ch;
+
+			if ('\0' == ch)
+			{
+				break;
+			}
+		}
+		*_out = '\0';
+
+		bx::seek(_reader, oldOffset, bx::Whence::Begin);
+
+		return size;
+	}
+
+	inline uint32_t dxbcMixF(uint32_t _b, uint32_t _c, uint32_t _d)
+	{
+		const uint32_t tmp0   = bx::uint32_xor(_c, _d);
+		const uint32_t tmp1   = bx::uint32_and(_b, tmp0);
+		const uint32_t result = bx::uint32_xor(_d, tmp1);
+
+		return result;
+	}
+
+	inline uint32_t dxbcMixG(uint32_t _b, uint32_t _c, uint32_t _d)
+	{
+		return dxbcMixF(_d, _b, _c);
+	}
+
+	inline uint32_t dxbcMixH(uint32_t _b, uint32_t _c, uint32_t _d)
+	{
+		const uint32_t tmp0   = bx::uint32_xor(_b, _c);
+		const uint32_t result = bx::uint32_xor(_d, tmp0);
+
+		return result;
+	}
+
+	inline uint32_t dxbcMixI(uint32_t _b, uint32_t _c, uint32_t _d)
+	{
+		const uint32_t tmp0   = bx::uint32_orc(_b, _d);
+		const uint32_t result = bx::uint32_xor(_c, tmp0);
+
+		return result;
+	}
+
+	void dxbcHashBlock(const uint32_t* data, uint32_t* hash)
+	{
+		const uint32_t d0  = data[ 0];
+		const uint32_t d1  = data[ 1];
+		const uint32_t d2  = data[ 2];
+		const uint32_t d3  = data[ 3];
+		const uint32_t d4  = data[ 4];
+		const uint32_t d5  = data[ 5];
+		const uint32_t d6  = data[ 6];
+		const uint32_t d7  = data[ 7];
+		const uint32_t d8  = data[ 8];
+		const uint32_t d9  = data[ 9];
+		const uint32_t d10 = data[10];
+		const uint32_t d11 = data[11];
+		const uint32_t d12 = data[12];
+		const uint32_t d13 = data[13];
+		const uint32_t d14 = data[14];
+		const uint32_t d15 = data[15];
+
+		uint32_t aa = hash[0];
+		uint32_t bb = hash[1];
+		uint32_t cc = hash[2];
+		uint32_t dd = hash[3];
+
+		aa = bb + bx::uint32_rol(aa + dxbcMixF(bb, cc, dd) + d0  + 0xd76aa478,  7);
+		dd = aa + bx::uint32_rol(dd + dxbcMixF(aa, bb, cc) + d1  + 0xe8c7b756, 12);
+		cc = dd + bx::uint32_ror(cc + dxbcMixF(dd, aa, bb) + d2  + 0x242070db, 15);
+		bb = cc + bx::uint32_ror(bb + dxbcMixF(cc, dd, aa) + d3  + 0xc1bdceee, 10);
+		aa = bb + bx::uint32_rol(aa + dxbcMixF(bb, cc, dd) + d4  + 0xf57c0faf,  7);
+		dd = aa + bx::uint32_rol(dd + dxbcMixF(aa, bb, cc) + d5  + 0x4787c62a, 12);
+		cc = dd + bx::uint32_ror(cc + dxbcMixF(dd, aa, bb) + d6  + 0xa8304613, 15);
+		bb = cc + bx::uint32_ror(bb + dxbcMixF(cc, dd, aa) + d7  + 0xfd469501, 10);
+		aa = bb + bx::uint32_rol(aa + dxbcMixF(bb, cc, dd) + d8  + 0x698098d8,  7);
+		dd = aa + bx::uint32_rol(dd + dxbcMixF(aa, bb, cc) + d9  + 0x8b44f7af, 12);
+		cc = dd + bx::uint32_ror(cc + dxbcMixF(dd, aa, bb) + d10 + 0xffff5bb1, 15);
+		bb = cc + bx::uint32_ror(bb + dxbcMixF(cc, dd, aa) + d11 + 0x895cd7be, 10);
+		aa = bb + bx::uint32_rol(aa + dxbcMixF(bb, cc, dd) + d12 + 0x6b901122,  7);
+		dd = aa + bx::uint32_rol(dd + dxbcMixF(aa, bb, cc) + d13 + 0xfd987193, 12);
+		cc = dd + bx::uint32_ror(cc + dxbcMixF(dd, aa, bb) + d14 + 0xa679438e, 15);
+		bb = cc + bx::uint32_ror(bb + dxbcMixF(cc, dd, aa) + d15 + 0x49b40821, 10);
+
+		aa = bb + bx::uint32_rol(aa + dxbcMixG(bb, cc, dd) + d1  + 0xf61e2562,  5);
+		dd = aa + bx::uint32_rol(dd + dxbcMixG(aa, bb, cc) + d6  + 0xc040b340,  9);
+		cc = dd + bx::uint32_rol(cc + dxbcMixG(dd, aa, bb) + d11 + 0x265e5a51, 14);
+		bb = cc + bx::uint32_ror(bb + dxbcMixG(cc, dd, aa) + d0  + 0xe9b6c7aa, 12);
+		aa = bb + bx::uint32_rol(aa + dxbcMixG(bb, cc, dd) + d5  + 0xd62f105d,  5);
+		dd = aa + bx::uint32_rol(dd + dxbcMixG(aa, bb, cc) + d10 + 0x02441453,  9);
+		cc = dd + bx::uint32_rol(cc + dxbcMixG(dd, aa, bb) + d15 + 0xd8a1e681, 14);
+		bb = cc + bx::uint32_ror(bb + dxbcMixG(cc, dd, aa) + d4  + 0xe7d3fbc8, 12);
+		aa = bb + bx::uint32_rol(aa + dxbcMixG(bb, cc, dd) + d9  + 0x21e1cde6,  5);
+		dd = aa + bx::uint32_rol(dd + dxbcMixG(aa, bb, cc) + d14 + 0xc33707d6,  9);
+		cc = dd + bx::uint32_rol(cc + dxbcMixG(dd, aa, bb) + d3  + 0xf4d50d87, 14);
+		bb = cc + bx::uint32_ror(bb + dxbcMixG(cc, dd, aa) + d8  + 0x455a14ed, 12);
+		aa = bb + bx::uint32_rol(aa + dxbcMixG(bb, cc, dd) + d13 + 0xa9e3e905,  5);
+		dd = aa + bx::uint32_rol(dd + dxbcMixG(aa, bb, cc) + d2  + 0xfcefa3f8,  9);
+		cc = dd + bx::uint32_rol(cc + dxbcMixG(dd, aa, bb) + d7  + 0x676f02d9, 14);
+		bb = cc + bx::uint32_ror(bb + dxbcMixG(cc, dd, aa) + d12 + 0x8d2a4c8a, 12);
+
+		aa = bb + bx::uint32_rol(aa + dxbcMixH(bb, cc, dd) + d5  + 0xfffa3942,  4);
+		dd = aa + bx::uint32_rol(dd + dxbcMixH(aa, bb, cc) + d8  + 0x8771f681, 11);
+		cc = dd + bx::uint32_rol(cc + dxbcMixH(dd, aa, bb) + d11 + 0x6d9d6122, 16);
+		bb = cc + bx::uint32_ror(bb + dxbcMixH(cc, dd, aa) + d14 + 0xfde5380c,  9);
+		aa = bb + bx::uint32_rol(aa + dxbcMixH(bb, cc, dd) + d1  + 0xa4beea44,  4);
+		dd = aa + bx::uint32_rol(dd + dxbcMixH(aa, bb, cc) + d4  + 0x4bdecfa9, 11);
+		cc = dd + bx::uint32_rol(cc + dxbcMixH(dd, aa, bb) + d7  + 0xf6bb4b60, 16);
+		bb = cc + bx::uint32_ror(bb + dxbcMixH(cc, dd, aa) + d10 + 0xbebfbc70,  9);
+		aa = bb + bx::uint32_rol(aa + dxbcMixH(bb, cc, dd) + d13 + 0x289b7ec6,  4);
+		dd = aa + bx::uint32_rol(dd + dxbcMixH(aa, bb, cc) + d0  + 0xeaa127fa, 11);
+		cc = dd + bx::uint32_rol(cc + dxbcMixH(dd, aa, bb) + d3  + 0xd4ef3085, 16);
+		bb = cc + bx::uint32_ror(bb + dxbcMixH(cc, dd, aa) + d6  + 0x04881d05,  9);
+		aa = bb + bx::uint32_rol(aa + dxbcMixH(bb, cc, dd) + d9  + 0xd9d4d039,  4);
+		dd = aa + bx::uint32_rol(dd + dxbcMixH(aa, bb, cc) + d12 + 0xe6db99e5, 11);
+		cc = dd + bx::uint32_rol(cc + dxbcMixH(dd, aa, bb) + d15 + 0x1fa27cf8, 16);
+		bb = cc + bx::uint32_ror(bb + dxbcMixH(cc, dd, aa) + d2  + 0xc4ac5665,  9);
+
+		aa = bb + bx::uint32_rol(aa + dxbcMixI(bb, cc, dd) + d0  + 0xf4292244,  6);
+		dd = aa + bx::uint32_rol(dd + dxbcMixI(aa, bb, cc) + d7  + 0x432aff97, 10);
+		cc = dd + bx::uint32_rol(cc + dxbcMixI(dd, aa, bb) + d14 + 0xab9423a7, 15);
+		bb = cc + bx::uint32_ror(bb + dxbcMixI(cc, dd, aa) + d5  + 0xfc93a039, 11);
+		aa = bb + bx::uint32_rol(aa + dxbcMixI(bb, cc, dd) + d12 + 0x655b59c3,  6);
+		dd = aa + bx::uint32_rol(dd + dxbcMixI(aa, bb, cc) + d3  + 0x8f0ccc92, 10);
+		cc = dd + bx::uint32_rol(cc + dxbcMixI(dd, aa, bb) + d10 + 0xffeff47d, 15);
+		bb = cc + bx::uint32_ror(bb + dxbcMixI(cc, dd, aa) + d1  + 0x85845dd1, 11);
+		aa = bb + bx::uint32_rol(aa + dxbcMixI(bb, cc, dd) + d8  + 0x6fa87e4f,  6);
+		dd = aa + bx::uint32_rol(dd + dxbcMixI(aa, bb, cc) + d15 + 0xfe2ce6e0, 10);
+		cc = dd + bx::uint32_rol(cc + dxbcMixI(dd, aa, bb) + d6  + 0xa3014314, 15);
+		bb = cc + bx::uint32_ror(bb + dxbcMixI(cc, dd, aa) + d13 + 0x4e0811a1, 11);
+		aa = bb + bx::uint32_rol(aa + dxbcMixI(bb, cc, dd) + d4  + 0xf7537e82,  6);
+		dd = aa + bx::uint32_rol(dd + dxbcMixI(aa, bb, cc) + d11 + 0xbd3af235, 10);
+		cc = dd + bx::uint32_rol(cc + dxbcMixI(dd, aa, bb) + d2  + 0x2ad7d2bb, 15);
+		bb = cc + bx::uint32_ror(bb + dxbcMixI(cc, dd, aa) + d9  + 0xeb86d391, 11);
+
+		hash[0] += aa;
+		hash[1] += bb;
+		hash[2] += cc;
+		hash[3] += dd;
+	}
+
+	// dxbc hash function is slightly modified version of MD5 hash.
+	// https://tools.ietf.org/html/rfc1321
+	// http://www.efgh.com/software/md5.txt
+	//
+	// Assumption is that data pointer, size are both 4-byte aligned,
+	// and little endian.
+	//
+	void dxbcHash(const void* _data, uint32_t _size, void* _digest)
+	{
+		uint32_t hash[4] =
+		{
+			0x67452301,
+			0xefcdab89,
+			0x98badcfe,
+			0x10325476,
+		};
+
+		const uint32_t* data = (const uint32_t*)_data;
+		for (uint32_t ii = 0, num = _size/64; ii < num; ++ii)
+		{
+			dxbcHashBlock(data, hash);
+			data += 16;
+		}
+
+		uint32_t last[16];
+		memset(last, 0, sizeof(last) );
+
+		const uint32_t remaining = _size & 0x3f;
+
+		if (remaining >= 56)
+		{
+			memcpy(&last[0], data, remaining);
+			last[remaining/4] = 0x80;
+			dxbcHashBlock(last, hash);
+
+			memset(&last[1], 0, 56);
+		}
+		else
+		{
+			memcpy(&last[1], data, remaining);
+			last[1 + remaining/4] = 0x80;
+		}
+
+		last[ 0] = _size * 8;
+		last[15] = _size * 2 + 1;
+		dxbcHashBlock(last, hash);
+
+		memcpy(_digest, hash, 16);
+	}
+
+	int32_t read(bx::ReaderI* _reader, DxbcSubOperand& _subOperand)
+	{
+		uint32_t token;
+		int32_t size = 0;
+
+		// 0       1       2       3
+		// 76543210765432107654321076543210
+		// e222111000nnttttttttssssssssmmoo
+		// ^^  ^  ^  ^ ^       ^       ^ ^-- number of operands
+		// ||  |  |  | |       |       +---- operand mode
+		// ||  |  |  | |       +------------ operand mode bits
+		// ||  |  |  | +-------------------- type
+		// ||  |  |  +---------------------- number of addressing modes
+		// ||  |  +------------------------- addressing mode 0
+		// ||  +---------------------------- addressing mode 1
+		// |+------------------------------- addressing mode 2
+		// +-------------------------------- extended
+
+		size += bx::read(_reader, token);
+		_subOperand.type         = DxbcOperandType::Enum( (token & UINT32_C(0x000ff000) ) >> 12);
+		_subOperand.numAddrModes =               uint8_t( (token & UINT32_C(0x00300000) ) >> 20);
+		_subOperand.addrMode     =               uint8_t( (token & UINT32_C(0x01c00000) ) >> 22);
+		_subOperand.mode         = DxbcOperandMode::Enum( (token & UINT32_C(0x0000000c) ) >>  2);
+		_subOperand.modeBits     =               uint8_t( (token & UINT32_C(0x00000ff0) ) >>  4) & "\x0f\xff\x03\x00"[_subOperand.mode];
+		_subOperand.num          =               uint8_t( (token & UINT32_C(0x00000003) )      );
+
+		switch (_subOperand.addrMode)
+		{
+		case DxbcOperandAddrMode::Imm32:
+			size += bx::read(_reader, _subOperand.regIndex);
+			break;
+
+		case DxbcOperandAddrMode::Reg:
+			{
+				DxbcSubOperand subOperand;
+				size += read(_reader, subOperand);
+			}
+			break;
+
+		case DxbcOperandAddrMode::RegImm32:
+			{
+				size += bx::read(_reader, _subOperand.regIndex);
+
+				DxbcSubOperand subOperand;
+				size += read(_reader, subOperand);
+			}
+			break;
+
+		case DxbcOperandAddrMode::RegImm64:
+			{
+				size += bx::read(_reader, _subOperand.regIndex);
+				size += bx::read(_reader, _subOperand.regIndex);
+
+				DxbcSubOperand subOperand;
+				size += read(_reader, subOperand);
+			}
+			break;
+
+		default:
+			BX_CHECK(false, "sub operand addressing mode %d", _subOperand.addrMode);
+			break;
+		}
+
+		return size;
+	}
+
+	int32_t write(bx::WriterI* _writer, const DxbcSubOperand& _subOperand)
+	{
+		int32_t size = 0;
+
+		uint32_t token = 0;
+		token |= (_subOperand.type         << 12) & UINT32_C(0x000ff000);
+		token |= (_subOperand.numAddrModes << 20) & UINT32_C(0x00300000);
+		token |= (_subOperand.addrMode     << 22) & UINT32_C(0x01c00000);
+		token |= (_subOperand.mode         <<  2) & UINT32_C(0x0000000c);
+		token |= (_subOperand.modeBits     <<  4) & UINT32_C(0x00000ff0);
+		token |=  _subOperand.num                 & UINT32_C(0x00000003);
+		size += bx::write(_writer, token);
+
+		switch (_subOperand.addrMode)
+		{
+		case DxbcOperandAddrMode::Imm32:
+			size += bx::write(_writer, _subOperand.regIndex);
+			break;
+
+		case DxbcOperandAddrMode::Reg:
+			{
+				DxbcSubOperand subOperand;
+				size += write(_writer, subOperand);
+			}
+			break;
+
+		case DxbcOperandAddrMode::RegImm32:
+			{
+				size += bx::write(_writer, _subOperand.regIndex);
+
+				DxbcSubOperand subOperand;
+				size += write(_writer, subOperand);
+			}
+			break;
+
+		case DxbcOperandAddrMode::RegImm64:
+			{
+				size += bx::write(_writer, _subOperand.regIndex);
+				size += bx::write(_writer, _subOperand.regIndex);
+
+				DxbcSubOperand subOperand;
+				size += write(_writer, subOperand);
+			}
+			break;
+
+		default:
+			BX_CHECK(false, "sub operand addressing mode %d", _subOperand.addrMode);
+			break;
+		}
+
+		return size;
+	}
+
+	int32_t read(bx::ReaderI* _reader, DxbcOperand& _operand)
+	{
+		int32_t size = 0;
+
+		uint32_t token;
+		size += bx::read(_reader, token);
+
+		// 0       1       2       3
+		// 76543210765432107654321076543210
+		// e222111000nnttttttttssssssssmmoo
+		// ^^  ^  ^  ^ ^       ^       ^ ^-- number of operands
+		// ||  |  |  | |       |       +---- operand mode
+		// ||  |  |  | |       +------------ operand mode bits
+		// ||  |  |  | +-------------------- type
+		// ||  |  |  +---------------------- number of addressing modes
+		// ||  |  +------------------------- addressing mode 0
+		// ||  +---------------------------- addressing mode 1
+		// |+------------------------------- addressing mode 2
+		// +-------------------------------- extended
+
+		_operand.extended     =                   0 != (token & UINT32_C(0x80000000) );
+		_operand.numAddrModes =               uint8_t( (token & UINT32_C(0x00300000) ) >> 20);
+		_operand.addrMode[0]  =               uint8_t( (token & UINT32_C(0x01c00000) ) >> 22);
+		_operand.addrMode[1]  =               uint8_t( (token & UINT32_C(0x0e000000) ) >> 25);
+		_operand.addrMode[2]  =               uint8_t( (token & UINT32_C(0x70000000) ) >> 28);
+		_operand.type         = DxbcOperandType::Enum( (token & UINT32_C(0x000ff000) ) >> 12);
+		_operand.mode         = DxbcOperandMode::Enum( (token & UINT32_C(0x0000000c) ) >>  2);
+		_operand.modeBits     =               uint8_t( (token & UINT32_C(0x00000ff0) ) >>  4) & "\x0f\xff\x03\x00"[_operand.mode];
+		_operand.num          =               uint8_t( (token & UINT32_C(0x00000003) )      );
+
+		if (_operand.extended)
+		{
+			size += bx::read(_reader, _operand.extBits);
+		}
+
+		switch (_operand.type)
+		{
+		case DxbcOperandType::Imm32:
+			_operand.num = 2 == _operand.num ? 4 : _operand.num;
+			for (uint32_t ii = 0; ii < _operand.num; ++ii)
+			{
+				size += bx::read(_reader, _operand.un.imm32[ii]);
+			}
+			break;
+
+		case DxbcOperandType::Imm64:
+			_operand.num = 2 == _operand.num ? 4 : _operand.num;
+			for (uint32_t ii = 0; ii < _operand.num; ++ii)
+			{
+				size += bx::read(_reader, _operand.un.imm64[ii]);
+			}
+			break;
+
+		default:
+			break;
+		}
+
+		for (uint32_t ii = 0; ii < _operand.numAddrModes; ++ii)
+		{
+			switch (_operand.addrMode[ii])
+			{
+			case DxbcOperandAddrMode::Imm32:
+				size += bx::read(_reader, _operand.regIndex[ii]);
+				break;
+
+			case DxbcOperandAddrMode::Reg:
+				size += read(_reader, _operand.subOperand[ii]);
+				break;
+
+			case DxbcOperandAddrMode::RegImm32:
+				size += bx::read(_reader, _operand.regIndex[ii]);
+				size += read(_reader, _operand.subOperand[ii]);
+				break;
+
+			default:
+				BX_CHECK(false, "operand %d addressing mode %d", ii, _operand.addrMode[ii]);
+				break;
+			}
+		}
+
+		return size;
+	}
+
+	int32_t write(bx::WriterI* _writer, const DxbcOperand& _operand)
+	{
+		int32_t size = 0;
+
+		uint32_t token = 0;
+		token |=  _operand.extended            ? UINT32_C(0x80000000) : 0;
+		token |= (_operand.numAddrModes << 20) & UINT32_C(0x00300000);
+		token |= (_operand.addrMode[0]  << 22) & UINT32_C(0x01c00000);
+		token |= (_operand.addrMode[1]  << 25) & UINT32_C(0x0e000000);
+		token |= (_operand.addrMode[2]  << 28) & UINT32_C(0x70000000);
+		token |= (_operand.type         << 12) & UINT32_C(0x000ff000);
+		token |= (_operand.mode         <<  2) & UINT32_C(0x0000000c);
+
+		token |= (4 == _operand.num ? 2 : _operand.num) & UINT32_C(0x00000003);
+		token |= ( (_operand.modeBits & "\x0f\xff\x03\x00"[_operand.mode]) << 4) & UINT32_C(0x00000ff0);
+
+		size += bx::write(_writer, token);
+
+		if (_operand.extended)
+		{
+			size += bx::write(_writer, _operand.extBits);
+		}
+
+		switch (_operand.type)
+		{
+		case DxbcOperandType::Imm32:
+			for (uint32_t ii = 0; ii < _operand.num; ++ii)
+			{
+				size += bx::write(_writer, _operand.un.imm32[ii]);
+			}
+			break;
+
+		case DxbcOperandType::Imm64:
+			for (uint32_t ii = 0; ii < _operand.num; ++ii)
+			{
+				size += bx::write(_writer, _operand.un.imm64[ii]);
+			}
+			break;
+
+		default:
+			break;
+		}
+
+		for (uint32_t ii = 0; ii < _operand.numAddrModes; ++ii)
+		{
+			switch (_operand.addrMode[ii])
+			{
+			case DxbcOperandAddrMode::Imm32:
+				size += bx::write(_writer, _operand.regIndex[ii]);
+				break;
+
+			case DxbcOperandAddrMode::Reg:
+				size += write(_writer, _operand.subOperand[ii]);
+				break;
+
+			case DxbcOperandAddrMode::RegImm32:
+				size += bx::write(_writer, _operand.regIndex[ii]);
+				size += write(_writer, _operand.subOperand[ii]);
+				break;
+
+			default:
+				BX_CHECK(false, "operand %d addressing mode %d", ii, _operand.addrMode[ii]);
+				break;
+			}
+		}
+
+		return size;
+	}
+
+	int32_t read(bx::ReaderI* _reader, DxbcInstruction& _instruction)
+	{
+		uint32_t size = 0;
+
+		uint32_t token;
+		size += bx::read(_reader, token);
+
+		// 0       1       2       3
+		// 76543210765432107654321076543210
+		// elllllll.............ooooooooooo
+		// ^^                   ^----------- opcode
+		// |+------------------------------- length
+		// +-------------------------------- extended
+
+		_instruction.opcode = DxbcOpcode::Enum( (token & UINT32_C(0x000007ff) )      );
+		_instruction.length =          uint8_t( (token & UINT32_C(0x7f000000) ) >> 24);
+		bool extended       =              0 != (token & UINT32_C(0x80000000) );
+
+		_instruction.srv     = DxbcResourceDim::Unknown;
+		_instruction.samples = 0;
+
+		_instruction.shadow = false;
+		_instruction.mono   = false;
+
+		_instruction.allowRefactoring = false;
+		_instruction.fp64             = false;
+		_instruction.earlyDepth       = false;
+		_instruction.enableBuffers    = false;
+		_instruction.skipOptimization = false;
+		_instruction.enableMinPrecision     = false;
+		_instruction.enableDoubleExtensions = false;
+		_instruction.enableShaderExtensions = false;
+
+		_instruction.threadsInGroup = false;
+		_instruction.sharedMemory   = false;
+		_instruction.uavGroup       = false;
+		_instruction.uavGlobal      = false;
+
+		_instruction.saturate = false;
+		_instruction.testNZ   = false;
+		_instruction.retType  = DxbcResourceReturnType::Unused;
+
+		switch (_instruction.opcode)
+		{
+			case DxbcOpcode::CUSTOMDATA:
+				{
+//					uint32_t dataClass;
+					size += bx::read(_reader, _instruction.length);
+					for (uint32_t ii = 0, num = (_instruction.length-2)/4; ii < num; ++ii)
+					{
+						char temp[16];
+						size += bx::read(_reader, temp, 16);
+					}
+
+				}
+				return size;
+
+			case DxbcOpcode::DCL_CONSTANT_BUFFER:
+				// 0       1       2       3
+				// 76543210765432107654321076543210
+				// ........            a...........
+				//                     ^------------ Allow refactoring
+
+				_instruction.allowRefactoring = 0 != (token & UINT32_C(0x00000800) );
+				break;
+
+			case DxbcOpcode::DCL_GLOBAL_FLAGS:
+				// 0       1       2       3
+				// 76543210765432107654321076543210
+				// ........     sxmoudfa...........
+				//              ^^^^^^^^------------ Allow refactoring
+				//              ||||||+------------- FP64
+				//              |||||+-------------- Force early depth/stencil
+				//              ||||+--------------- Enable raw and structured buffers
+				//              |||+---------------- Skip optimizations
+				//              ||+----------------- Enable minimum precision
+				//              |+------------------ Enable double extension
+				//              +------------------- Enable shader extension
+
+				_instruction.allowRefactoring       = 0 != (token & UINT32_C(0x00000800) );
+				_instruction.fp64                   = 0 != (token & UINT32_C(0x00001000) );
+				_instruction.earlyDepth             = 0 != (token & UINT32_C(0x00002000) );
+				_instruction.enableBuffers          = 0 != (token & UINT32_C(0x00004000) );
+				_instruction.skipOptimization       = 0 != (token & UINT32_C(0x00008000) );
+				_instruction.enableMinPrecision     = 0 != (token & UINT32_C(0x00010000) );
+				_instruction.enableDoubleExtensions = 0 != (token & UINT32_C(0x00020000) );
+				_instruction.enableShaderExtensions = 0 != (token & UINT32_C(0x00040000) );
+				break;
+
+			case DxbcOpcode::DCL_INPUT_PS:
+				// 0       1       2       3
+				// 76543210765432107654321076543210
+				// ........        iiiii...........
+				//                 ^---------------- Interploation
+
+				_instruction.interpolation = DxbcInterpolation::Enum( (token & UINT32_C(0x0000f800) ) >> 11);
+				break;
+
+			case DxbcOpcode::DCL_RESOURCE:
+				// 0       1       2       3
+				// 76543210765432107654321076543210
+				// ........ sssssssrrrrr...........
+				//          ^      ^---------------- SRV
+				//          +----------------------- MSAA samples
+
+				_instruction.srv     = DxbcResourceDim::Enum( (token & UINT32_C(0x0000f800) ) >> 11);
+				_instruction.samples =               uint8_t( (token & UINT32_C(0x007f0000) ) >> 16);
+				break;
+
+			case DxbcOpcode::DCL_SAMPLER:
+				// 0       1       2       3
+				// 76543210765432107654321076543210
+				// ........           ms...........
+				//                    ^^------------ Shadow sampler
+				//                    +------------- Mono
+
+				_instruction.shadow = 0 != (token & UINT32_C(0x00000800) );
+				_instruction.mono   = 0 != (token & UINT32_C(0x00001000) );
+				break;
+
+			case DxbcOpcode::SYNC:
+				// 0       1       2       3
+				// 76543210765432107654321076543210
+				// ........         gust...........
+				//                  ^^^^------------ Threads in group
+				//                  ||+------------- Shared memory
+				//                  |+-------------- UAV group
+				//                  +--------------- UAV global
+
+				_instruction.threadsInGroup = 0 != (token & UINT32_C(0x00000800) );
+				_instruction.sharedMemory   = 0 != (token & UINT32_C(0x00001000) );
+				_instruction.uavGroup       = 0 != (token & UINT32_C(0x00002000) );
+				_instruction.uavGlobal      = 0 != (token & UINT32_C(0x00004000) );
+				break;
+
+			default:
+				// 0       1       2       3
+				// 76543210765432107654321076543210
+				// ........ ppppn    stt...........
+				//          ^   ^    ^^------------- Resource info return type
+				//          |   |    +-------------- Saturate
+				//          |   +------------------- Test not zero
+				//          +----------------------- Precise mask
+
+				_instruction.retType  = DxbcResourceReturnType::Enum( (token & UINT32_C(0x00001800) ) >> 11);
+				_instruction.saturate =                          0 != (token & UINT32_C(0x00002000) );
+				_instruction.testNZ   =                          0 != (token & UINT32_C(0x00040000) );
+//				_instruction.precise  =              uint8_t( (token & UINT32_C(0x00780000) ) >> 19);
+				break;
+		}
+
+		_instruction.extended[0] = DxbcInstruction::ExtendedType::Count;
+		for (uint32_t ii = 0; extended; ++ii)
+		{
+			// 0       1       2       3
+			// 76543210765432107654321076543210
+			// e..........................ttttt
+			// ^                          ^
+			// |                          +----- type
+			// +-------------------------------- extended
+
+			uint32_t extBits;
+			size += bx::read(_reader, extBits);
+			extended = 0 != (extBits & UINT32_C(0x80000000) );
+			_instruction.extended[ii] = DxbcInstruction::ExtendedType::Enum(extBits & UINT32_C(0x0000001f) );
+
+			switch (_instruction.extended[ii])
+			{
+			case DxbcInstruction::ExtendedType::SampleControls:
+				// 0       1       2       3
+				// 76543210765432107654321076543210
+				// .          zzzzyyyyxxxx    .....
+				//            ^   ^   ^
+				//            |   |   +------------- x
+				//            |   +----------------- y
+				//            +--------------------- z
+
+				_instruction.sampleOffsets[0] = (extBits & UINT32_C(0x00001e00) ) >>  9;
+				_instruction.sampleOffsets[1] = (extBits & UINT32_C(0x0001e000) ) >> 13;
+				_instruction.sampleOffsets[2] = (extBits & UINT32_C(0x001e0000) ) >> 17;
+				break;
+
+			case DxbcInstruction::ExtendedType::ResourceDim:
+				// 0       1       2       3
+				// 76543210765432107654321076543210
+				// .                          .....
+				//
+
+				_instruction.resourceTarget = DxbcResourceDim::Enum( (extBits & UINT32_C(0x000003e0) ) >>  6);
+				_instruction.resourceStride =                        (extBits & UINT32_C(0x0000f800) ) >> 11;
+				break;
+
+			case DxbcInstruction::ExtendedType::ResourceReturnType:
+				// 0       1       2       3
+				// 76543210765432107654321076543210
+				// .          3333222211110000.....
+				//            ^   ^   ^
+				//            |   |   +------------- x
+				//            |   +----------------- y
+				//            +--------------------- z
+
+				_instruction.resourceReturnTypes[0] = DxbcResourceReturnType::Enum( (extBits & UINT32_C(0x000001e0) ) >>   6);
+				_instruction.resourceReturnTypes[1] = DxbcResourceReturnType::Enum( (extBits & UINT32_C(0x00001e00) ) >>   9);
+				_instruction.resourceReturnTypes[2] = DxbcResourceReturnType::Enum( (extBits & UINT32_C(0x0001e000) ) >>  13);
+				_instruction.resourceReturnTypes[3] = DxbcResourceReturnType::Enum( (extBits & UINT32_C(0x001e0000) ) >>  17);
+				break;
+
+			default:
+				break;
+			}
+		}
+
+		switch (_instruction.opcode)
+		{
+			case DxbcOpcode::DCL_FUNCTION_TABLE:
+				{
+					uint32_t tableId;
+					size += read(_reader, tableId);
+
+					uint32_t num;
+					size += read(_reader, num);
+
+					for (uint32_t ii = 0; ii < num; ++ii)
+					{
+						uint32_t bodyId;
+						size += read(_reader, bodyId);
+					}
+				}
+				break;
+
+			case DxbcOpcode::DCL_INTERFACE:
+				{
+					uint32_t interfaceId;
+					size += read(_reader, interfaceId);
+
+					uint32_t num;
+					size += read(_reader, num);
+
+					BX_CHECK(false, "not implemented.");
+				}
+				break;
+
+			default:
+				break;
+		};
+
+		uint32_t currOp = 0;
+
+		const DxbcOpcodeInfo& info = s_dxbcOpcodeInfo[_instruction.opcode];
+		_instruction.numOperands = info.numOperands;
+		switch (info.numOperands)
+		{
+		case 6: size += read(_reader, _instruction.operand[currOp++]);
+		case 5: size += read(_reader, _instruction.operand[currOp++]);
+		case 4: size += read(_reader, _instruction.operand[currOp++]);
+		case 3: size += read(_reader, _instruction.operand[currOp++]);
+		case 2: size += read(_reader, _instruction.operand[currOp++]);
+		case 1: size += read(_reader, _instruction.operand[currOp++]);
+		case 0:
+			if (0 < info.numValues)
+			{
+				size += read(_reader, _instruction.value, info.numValues*sizeof(uint32_t) );
+			}
+			break;
+
+		default:
+			BX_CHECK(false, "Instruction %s with invalid number of operands %d (numValues %d)."
+					, getName(_instruction.opcode)
+					, info.numOperands
+					, info.numValues
+					);
+			break;
+		}
+
+		return size;
+	}
+
+	int32_t write(bx::WriterI* _writer, const DxbcInstruction& _instruction)
+	{
+		uint32_t token = 0;
+		token |= (_instruction.opcode        ) & UINT32_C(0x000007ff);
+		token |= (_instruction.length   << 24) & UINT32_C(0x7f000000);
+
+		token |=  DxbcInstruction::ExtendedType::Count != _instruction.extended[0]
+			? UINT32_C(0x80000000)
+			: 0
+			;
+
+		switch (_instruction.opcode)
+		{
+//			case DxbcOpcode::CUSTOMDATA:
+//				return size;
+
+			case DxbcOpcode::DCL_CONSTANT_BUFFER:
+				token |= _instruction.allowRefactoring ? UINT32_C(0x00000800) : 0;
+				break;
+
+			case DxbcOpcode::DCL_GLOBAL_FLAGS:
+				token |= _instruction.allowRefactoring       ? UINT32_C(0x00000800) : 0;
+				token |= _instruction.fp64                   ? UINT32_C(0x00001000) : 0;
+				token |= _instruction.earlyDepth             ? UINT32_C(0x00002000) : 0;
+				token |= _instruction.enableBuffers          ? UINT32_C(0x00004000) : 0;
+				token |= _instruction.skipOptimization       ? UINT32_C(0x00008000) : 0;
+				token |= _instruction.enableMinPrecision     ? UINT32_C(0x00010000) : 0;
+				token |= _instruction.enableDoubleExtensions ? UINT32_C(0x00020000) : 0;
+				token |= _instruction.enableShaderExtensions ? UINT32_C(0x00040000) : 0;
+				break;
+
+			case DxbcOpcode::DCL_INPUT_PS:
+				token |= (_instruction.interpolation << 11) & UINT32_C(0x0000f800);
+				break;
+
+			case DxbcOpcode::DCL_RESOURCE:
+				token |= (_instruction.srv     << 11) & UINT32_C(0x0000f800);
+				token |= (_instruction.samples << 16) & UINT32_C(0x007f0000);
+				break;
+
+			case DxbcOpcode::DCL_SAMPLER:
+				token |= _instruction.shadow ? (0x00000800) : 0;
+				token |= _instruction.mono   ? (0x00001000) : 0;
+				break;
+
+			case DxbcOpcode::SYNC:
+				token |= _instruction.threadsInGroup ? UINT32_C(0x00000800) : 0;
+				token |= _instruction.sharedMemory   ? UINT32_C(0x00001000) : 0;
+				token |= _instruction.uavGroup       ? UINT32_C(0x00002000) : 0;
+				token |= _instruction.uavGlobal      ? UINT32_C(0x00004000) : 0;
+				break;
+
+			default:
+				token |= (_instruction.retType << 11) & UINT32_C(0x00001800);
+				token |=  _instruction.saturate ? UINT32_C(0x00002000) : 0;
+				token |=  _instruction.testNZ   ? UINT32_C(0x00040000) : 0;
+//				_instruction.precise  =              uint8_t( (token & UINT32_C(0x00780000) ) >> 19);
+				break;
+		}
+
+		uint32_t size =0;
+		size += bx::write(_writer, token);
+
+		for (uint32_t ii = 0; ii < _instruction.numOperands; ++ii)
+		{
+			size += write(_writer, _instruction.operand[ii]);
+		}
+
+		const DxbcOpcodeInfo& info = s_dxbcOpcodeInfo[_instruction.opcode];
+		if (0 < info.numValues)
+		{
+			size += bx::write(_writer, _instruction.value, info.numValues*sizeof(uint32_t) );
+		}
+
+		return size;
+	}
+
+	int32_t toString(char* _out, int32_t _size, const DxbcInstruction& _instruction)
+	{
+		int32_t size = 0;
+
+		size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
+							, "%s%s%s"
+							, getName(_instruction.opcode)
+							, _instruction.saturate ? "_sat" : ""
+							, _instruction.testNZ ? "_nz" : ""
+							);
+
+		if (DxbcResourceDim::Unknown != _instruction.srv)
+		{
+			size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
+								, " %s<%x>"
+								, s_dxbcSrvType[_instruction.srv]
+								, _instruction.value[0]
+								);
+		}
+		else if (0 < s_dxbcOpcodeInfo[_instruction.opcode].numValues)
+		{
+			size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
+								, " %d"
+								, _instruction.value[0]
+								);
+		}
+
+		for (uint32_t ii = 0; ii < _instruction.numOperands; ++ii)
+		{
+			const DxbcOperand& operand = _instruction.operand[ii];
+
+			const bool array = false
+				|| 1 < operand.numAddrModes
+				|| DxbcOperandAddrMode::Imm32 != operand.addrMode[0]
+				;
+
+			size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
+								, "%s%s%s"
+								, 0 == ii ? " " : ", "
+								, operand.extended ? "*" : ""
+								, s_dxbcOperandType[operand.type]
+								);
+
+			switch (operand.type)
+			{
+			case DxbcOperandType::Imm32:
+			case DxbcOperandType::Imm64:
+				for (uint32_t jj = 0; jj < operand.num; ++jj)
+				{
+					union { uint32_t i; float f; } cast = { operand.un.imm32[jj] };
+					size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
+										, "%s%f"
+										, 0 == jj ? "(" : ", "
+										, cast.f
+										);
+				}
+
+				size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
+									, ")"
+									);
+				break;
+
+			default:
+				break;
+			}
+
+			const uint32_t first = DxbcOperandAddrMode::RegImm32 == operand.addrMode[0] ? 0 : 1;
+			if (0 == first)
+			{
+				size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
+									, "["
+									);
+			}
+			else
+			{
+				size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
+									, "%d%s"
+									, operand.regIndex[0]
+									, array ? "[" : ""
+									);
+			}
+
+			for (uint32_t jj = first; jj < operand.numAddrModes; ++jj)
+			{
+				switch (operand.addrMode[jj])
+				{
+				case DxbcOperandAddrMode::Imm32:
+					size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
+										, "%d"
+										, operand.regIndex[jj]
+										);
+					break;
+
+				case DxbcOperandAddrMode::Reg:
+					size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
+										, "%s%d"
+										, s_dxbcOperandType[operand.subOperand[jj].type]
+										, operand.regIndex[jj]
+										);
+					break;
+
+				case DxbcOperandAddrMode::RegImm32:
+					size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
+										, "%d + %s%d"
+										, operand.regIndex[jj]
+										, s_dxbcOperandType[operand.subOperand[jj].type]
+										, operand.regIndex[jj]
+										);
+					break;
+
+				default:
+					break;
+				}
+			}
+
+			size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
+								, "%s"
+								, array ? "]" : ""
+								);
+
+			switch (operand.mode)
+			{
+			case DxbcOperandMode::Mask:
+				if (0xf > operand.modeBits
+				&&  0   < operand.modeBits)
+				{
+					size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
+										, ".%s%s%s%s"
+										, 0 == (operand.modeBits & 1) ? "" : "x"
+										, 0 == (operand.modeBits & 2) ? "" : "y"
+										, 0 == (operand.modeBits & 4) ? "" : "z"
+										, 0 == (operand.modeBits & 8) ? "" : "w"
+										);
+				}
+				break;
+
+			case DxbcOperandMode::Swizzle:
+				if (0xe4 != operand.modeBits)
+				{
+					size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
+										, ".%c%c%c%c"
+										, "xyzw"[(operand.modeBits   )&0x3]
+										, "xyzw"[(operand.modeBits>>2)&0x3]
+										, "xyzw"[(operand.modeBits>>4)&0x3]
+										, "xyzw"[(operand.modeBits>>6)&0x3]
+										);
+				}
+				break;
+
+			case DxbcOperandMode::Scalar:
+				size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
+									, ".%c"
+									, "xyzw"[operand.modeBits]
+									);
+				break;
+
+			default:
+				break;
+			}
+
+		}
+
+		return size;
+	}
+
+	int32_t read(bx::ReaderSeekerI* _reader, DxbcSignature& _signature)
+	{
+		int32_t size = 0;
+
+		int64_t offset = bx::seek(_reader);
+
+		uint32_t num;
+		size += bx::read(_reader, num);
+		size += bx::read(_reader, _signature.key);
+
+		for (uint32_t ii = 0; ii < num; ++ii)
+		{
+			DxbcSignature::Element element;
+
+			uint32_t nameOffset;
+			size += bx::read(_reader, nameOffset);
+
+			char name[DXBC_MAX_NAME_STRING];
+			readString(_reader, offset + nameOffset, name);
+			element.name = name;
+
+			size += bx::read(_reader, element.semanticIndex);
+			size += bx::read(_reader, element.valueType);
+			size += bx::read(_reader, element.componentType);
+			size += bx::read(_reader, element.registerIndex);
+			size += bx::read(_reader, element.mask);
+			size += bx::read(_reader, element.readWriteMask);
+			size += bx::read(_reader, element.stream);
+
+			// padding
+			uint8_t padding;
+			size += bx::read(_reader, padding);
+
+			_signature.elements.push_back(element);
+		}
+
+		return size;
+	}
+
+	int32_t write(bx::WriterI* _writer, const DxbcSignature& _signature)
+	{
+		int32_t size = 0;
+
+		const uint32_t num = uint32_t(_signature.elements.size() );
+		size += bx::write(_writer, num);
+		size += bx::write(_writer, _signature.key);
+
+		typedef stl::unordered_map<stl::string, uint32_t> NameOffsetMap;
+		NameOffsetMap nom;
+
+		const uint8_t pad = 0;
+		uint32_t nameOffset = num * 24 + 8;
+		for (uint32_t ii = 0; ii < num; ++ii)
+		{
+			const DxbcSignature::Element& element = _signature.elements[ii];
+
+			NameOffsetMap::iterator it = nom.find(element.name);
+			if (it == nom.end() )
+			{
+				nom.insert(stl::make_pair(element.name, nameOffset) );
+				size += bx::write(_writer, nameOffset);
+				nameOffset += uint32_t(element.name.size() + 1);
+			}
+			else
+			{
+				size += bx::write(_writer, it->second);
+			}
+
+			size += bx::write(_writer, element.semanticIndex);
+			size += bx::write(_writer, element.valueType);
+			size += bx::write(_writer, element.componentType);
+			size += bx::write(_writer, element.registerIndex);
+			size += bx::write(_writer, element.mask);
+			size += bx::write(_writer, element.readWriteMask);
+			size += bx::write(_writer, element.stream);
+			size += bx::write(_writer, pad);
+
+		}
+
+		uint32_t len = 0;
+		for (uint32_t ii = 0; ii < num; ++ii)
+		{
+			const DxbcSignature::Element& element = _signature.elements[ii];
+			NameOffsetMap::iterator it = nom.find(element.name);
+			if (it != nom.end() )
+			{
+				nom.erase(it);
+				size += bx::write(_writer, element.name.c_str(), uint32_t(element.name.size() + 1) );
+				len  += uint32_t(element.name.size() + 1);
+			}
+		}
+
+		// align 4 bytes
+		size += bx::writeRep(_writer, 0xab, (len+3)/4*4 - len);
+
+		return size;
+	}
+
+	int32_t read(bx::ReaderSeekerI* _reader, DxbcShader& _shader)
+	{
+		int32_t size = 0;
+
+		size += bx::read(_reader, _shader.version);
+
+		uint32_t bcLength;
+		size += bx::read(_reader, bcLength);
+
+		uint32_t len = (bcLength-2)*sizeof(uint32_t);
+		_shader.byteCode.resize(len);
+		size += bx::read(_reader, _shader.byteCode.data(), len);
+
+		return size;
+	}
+
+	int32_t write(bx::WriterI* _writer, const DxbcShader& _shader)
+	{
+		const uint32_t len = uint32_t(_shader.byteCode.size() );
+		const uint32_t bcLength = len / sizeof(uint32_t) + 2;
+
+		int32_t size = 0;
+		size += bx::write(_writer, _shader.version);
+		size += bx::write(_writer, bcLength);
+		size += bx::write(_writer, _shader.byteCode.data(), len);
+
+		return size;
+	}
+
+#define DXBC_CHUNK_HEADER           BX_MAKEFOURCC('D', 'X', 'B', 'C')
+#define DXBC_CHUNK_SHADER           BX_MAKEFOURCC('S', 'H', 'D', 'R')
+#define DXBC_CHUNK_INPUT_SIGNATURE  BX_MAKEFOURCC('I', 'S', 'G', 'N')
+#define DXBC_CHUNK_OUTPUT_SIGNATURE BX_MAKEFOURCC('O', 'S', 'G', 'N')
+
+	int32_t read(bx::ReaderSeekerI* _reader, DxbcContext& _dxbc)
+	{
+		int32_t size = 0;
+		size += bx::read(_reader, _dxbc.header);
+
+		for (uint32_t ii = 0; ii < _dxbc.header.numChunks; ++ii)
+		{
+			bx::seek(_reader, sizeof(DxbcContext::Header) + ii*sizeof(uint32_t), bx::Whence::Begin);
+
+			uint32_t chunkOffset;
+			size += bx::read(_reader, chunkOffset);
+
+			bx::seek(_reader, chunkOffset, bx::Whence::Begin);
+
+			uint32_t fourcc;
+			size += bx::read(_reader, fourcc);
+
+			uint32_t chunkSize;
+			size += bx::read(_reader, chunkSize);
+
+			switch (fourcc)
+			{
+			case DXBC_CHUNK_SHADER:
+			case BX_MAKEFOURCC('S', 'H', 'E', 'X'):
+				size += read(_reader, _dxbc.shader);
+				break;
+
+			case BX_MAKEFOURCC('I', 'S', 'G', '1'):
+			case DXBC_CHUNK_INPUT_SIGNATURE:
+				size += read(_reader, _dxbc.inputSignature);
+				break;
+
+			case BX_MAKEFOURCC('O', 'S', 'G', '1'):
+			case BX_MAKEFOURCC('O', 'S', 'G', '5'):
+			case DXBC_CHUNK_OUTPUT_SIGNATURE:
+				size += read(_reader, _dxbc.outputSignature);
+				break;
+
+			case BX_MAKEFOURCC('R', 'D', 'E', 'F'):
+			case BX_MAKEFOURCC('I', 'F', 'C', 'E'):
+			case BX_MAKEFOURCC('P', 'C', 'S', 'G'):
+			case BX_MAKEFOURCC('S', 'T', 'A', 'T'):
+			case BX_MAKEFOURCC('S', 'F', 'I', '0'):
+			case BX_MAKEFOURCC('P', 'S', 'O', '1'):
+			case BX_MAKEFOURCC('P', 'S', 'O', '2'):
+				size += chunkSize;
+				break;
+
+			default:
+				size += chunkSize;
+				BX_CHECK(false, "UNKNOWN FOURCC %c%c%c%c %d"
+					, ( (char*)&fourcc)[0]
+					, ( (char*)&fourcc)[1]
+					, ( (char*)&fourcc)[2]
+					, ( (char*)&fourcc)[3]
+					, size
+					);
+				break;
+			}
+		}
+
+		return size;
+	}
+
+	int32_t write(bx::WriterSeekerI* _writer, const DxbcContext& _dxbc)
+	{
+		int32_t size = 0;
+
+		int64_t dxbcOffset = bx::seek(_writer);
+		size += bx::write(_writer, DXBC_CHUNK_HEADER);
+
+		size += bx::writeRep(_writer, 0, 16);
+
+		size += bx::write(_writer, UINT32_C(1) );
+
+		int64_t sizeOffset = bx::seek(_writer);
+		size += bx::writeRep(_writer, 0, 4);
+
+		uint32_t numChunks = 3;
+		size += bx::write(_writer, numChunks);
+
+		int64_t chunksOffsets = bx::seek(_writer);
+		size += bx::writeRep(_writer, 0, numChunks*sizeof(uint32_t) );
+
+		uint32_t chunkOffset[3];
+		uint32_t chunkSize[3];
+
+		chunkOffset[0] = uint32_t(bx::seek(_writer) - dxbcOffset);
+		size += write(_writer, DXBC_CHUNK_INPUT_SIGNATURE);
+		size += write(_writer, UINT32_C(0) );
+		chunkSize[0] = write(_writer, _dxbc.inputSignature);
+
+		chunkOffset[1] = uint32_t(bx::seek(_writer) - dxbcOffset);
+		size += write(_writer, DXBC_CHUNK_OUTPUT_SIGNATURE);
+		size += write(_writer, UINT32_C(0) );
+		chunkSize[1] = write(_writer, _dxbc.outputSignature);
+
+		chunkOffset[2] = uint32_t(bx::seek(_writer) - dxbcOffset);
+		size += write(_writer, DXBC_CHUNK_SHADER);
+		size += write(_writer, UINT32_C(0) );
+		chunkSize[2] = write(_writer, _dxbc.shader);
+
+		size += 0
+			+ chunkSize[0]
+			+ chunkSize[1]
+			+ chunkSize[2]
+			;
+
+		int64_t eof = bx::seek(_writer);
+
+		bx::seek(_writer, sizeOffset, bx::Whence::Begin);
+		bx::write(_writer, size);
+
+		bx::seek(_writer, chunksOffsets, bx::Whence::Begin);
+		bx::write(_writer, chunkOffset, sizeof(chunkOffset) );
+
+		for (uint32_t ii = 0; ii < BX_COUNTOF(chunkOffset); ++ii)
+		{
+			bx::seek(_writer, chunkOffset[ii]+4, bx::Whence::Begin);
+			bx::write(_writer, chunkSize[ii]);
+		}
+
+		bx::seek(_writer, eof, bx::Whence::Begin);
+
+		return size;
+	}
+
+	void parse(const DxbcShader& _src, DxbcParseFn _fn, void* _userData)
+	{
+		bx::MemoryReader reader(_src.byteCode.data(), uint32_t(_src.byteCode.size() ) );
+
+		for (uint32_t token = 0, numTokens = uint32_t(_src.byteCode.size() / sizeof(uint32_t) ); token < numTokens;)
+		{
+			DxbcInstruction instruction;
+			uint32_t size = read(&reader, instruction);
+
+			BX_CHECK(size/4 == instruction.length, "read %d, expected %d", size/4, instruction.length);
+
+			_fn(token * sizeof(uint32_t), instruction, _userData);
+
+			token += instruction.length;
+		}
+	}
+
+	void filter(DxbcShader& _dst, const DxbcShader& _src, DxbcFilterFn _fn, void* _userData)
+	{
+		bx::MemoryReader reader(_src.byteCode.data(), uint32_t(_src.byteCode.size() ) );
+
+		bx::CrtAllocator r;
+		bx::MemoryBlock mb(&r);
+		bx::MemoryWriter writer(&mb);
+
+		for (uint32_t token = 0, numTokens = uint32_t(_src.byteCode.size() / sizeof(uint32_t) ); token < numTokens;)
+		{
+			DxbcInstruction instruction;
+			uint32_t size = read(&reader, instruction);
+			BX_CHECK(size/4 == instruction.length, "read %d, expected %d", size/4, instruction.length);
+
+			_fn(instruction, _userData);
+
+			write(&writer, instruction);
+
+			token += instruction.length;
+		}
+
+		uint8_t* data = (uint8_t*)mb.more();
+		uint32_t size = uint32_t(bx::getSize(&writer) );
+		_dst.byteCode.reserve(size);
+		memcpy(_dst.byteCode.data(), data, size);
+	}
+
+} // namespace bgfx

+ 608 - 0
src/shader_dxbc.h

@@ -0,0 +1,608 @@
+/*
+ * Copyright 2011-2015 Branimir Karadzic. All rights reserved.
+ * License: http://www.opensource.org/licenses/BSD-2-Clause
+ */
+
+#ifndef BGFX_SHADER_DXBC_H
+#define BGFX_SHADER_DXBC_H
+
+#include <bx/readerwriter.h>
+
+namespace bgfx
+{
+	struct DxbcOpcode
+	{
+		enum Enum
+		{
+			ADD,
+			AND,
+			BREAK,
+			BREAKC,
+			CALL,
+			CALLC,
+			CASE,
+			CONTINUE,
+			CONTINUEC,
+			CUT,
+			DEFAULT,
+			DERIV_RTX,
+			DERIV_RTY,
+			DISCARD,
+			DIV,
+			DP2,
+			DP3,
+			DP4,
+			ELSE,
+			EMIT,
+			EMITTHENCUT,
+			ENDIF,
+			ENDLOOP,
+			ENDSWITCH,
+			EQ,
+			EXP,
+			FRC,
+			FTOI,
+			FTOU,
+			GE,
+			IADD,
+			IF,
+			IEQ,
+			IGE,
+			ILT,
+			IMAD,
+			IMAX,
+			IMIN,
+			IMUL,
+			INE,
+			INEG,
+			ISHL,
+			ISHR,
+			ITOF,
+			LABEL,
+			LD,
+			LD_MS,
+			LOG,
+			LOOP,
+			LT,
+			MAD,
+			MIN,
+			MAX,
+			CUSTOMDATA,
+			MOV,
+			MOVC,
+			MUL,
+			NE,
+			NOP,
+			NOT,
+			OR,
+			RESINFO,
+			RET,
+			RETC,
+			ROUND_NE,
+			ROUND_NI,
+			ROUND_PI,
+			ROUND_Z,
+			RSQ,
+			SAMPLE,
+			SAMPLE_C,
+			SAMPLE_C_LZ,
+			SAMPLE_L,
+			SAMPLE_D,
+			SAMPLE_B,
+			SQRT,
+			SWITCH,
+			SINCOS,
+			UDIV,
+			ULT,
+			UGE,
+			UMUL,
+			UMAD,
+			UMAX,
+			UMIN,
+			USHR,
+			UTOF,
+			XOR,
+			DCL_RESOURCE,
+			DCL_CONSTANT_BUFFER,
+			DCL_SAMPLER,
+			DCL_INDEX_RANGE,
+			DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY,
+			DCL_GS_INPUT_PRIMITIVE,
+			DCL_MAX_OUTPUT_VERTEX_COUNT,
+			DCL_INPUT,
+			DCL_INPUT_SGV,
+			DCL_INPUT_SIV,
+			DCL_INPUT_PS,
+			DCL_INPUT_PS_SGV,
+			DCL_INPUT_PS_SIV,
+			DCL_OUTPUT,
+			DCL_OUTPUT_SGV,
+			DCL_OUTPUT_SIV,
+			DCL_TEMPS,
+			DCL_INDEXABLE_TEMP,
+			DCL_GLOBAL_FLAGS,
+
+			UnknownD3D10,
+			LOD,
+			GATHER4,
+			SAMPLE_POS,
+			SAMPLE_INFO,
+
+			UnknownD3D10_1,
+			HS_DECLS,
+			HS_CONTROL_POINT_PHASE,
+			HS_FORK_PHASE,
+			HS_JOIN_PHASE,
+			EMIT_STREAM,
+			CUT_STREAM,
+			EMITTHENCUT_STREAM,
+			INTERFACE_CALL,
+			BUFINFO,
+			DERIV_RTX_COARSE,
+			DERIV_RTX_FINE,
+			DERIV_RTY_COARSE,
+			DERIV_RTY_FINE,
+			GATHER4_C,
+			GATHER4_PO,
+			GATHER4_PO_C,
+			RCP,
+			F32TOF16,
+			F16TOF32,
+			UADDC,
+			USUBB,
+			COUNTBITS,
+			FIRSTBIT_HI,
+			FIRSTBIT_LO,
+			FIRSTBIT_SHI,
+			UBFE,
+			IBFE,
+			BFI,
+			BFREV,
+			SWAPC,
+			DCL_STREAM,
+			DCL_FUNCTION_BODY,
+			DCL_FUNCTION_TABLE,
+			DCL_INTERFACE,
+			DCL_INPUT_CONTROL_POINT_COUNT,
+			DCL_OUTPUT_CONTROL_POINT_COUNT,
+			DCL_TESS_DOMAIN,
+			DCL_TESS_PARTITIONING,
+			DCL_TESS_OUTPUT_PRIMITIVE,
+			DCL_HS_MAX_TESSFACTOR,
+			DCL_HS_FORK_PHASE_INSTANCE_COUNT,
+			DCL_HS_JOIN_PHASE_INSTANCE_COUNT,
+			DCL_THREAD_GROUP,
+			DCL_UNORDERED_ACCESS_VIEW_TYPED,
+			DCL_UNORDERED_ACCESS_VIEW_RAW,
+			DCL_UNORDERED_ACCESS_VIEW_STRUCTURED,
+			DCL_THREAD_GROUP_SHARED_MEMORY_RAW,
+			DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED,
+			DCL_RESOURCE_RAW,
+			DCL_RESOURCE_STRUCTURED,
+			LD_UAV_TYPED,
+			STORE_UAV_TYPED,
+			LD_RAW,
+			STORE_RAW,
+			LD_STRUCTURED,
+			STORE_STRUCTURED,
+			ATOMIC_AND,
+			ATOMIC_OR,
+			ATOMIC_XOR,
+			ATOMIC_CMP_STORE,
+			ATOMIC_IADD,
+			ATOMIC_IMAX,
+			ATOMIC_IMIN,
+			ATOMIC_UMAX,
+			ATOMIC_UMIN,
+			IMM_ATOMIC_ALLOC,
+			IMM_ATOMIC_CONSUME,
+			IMM_ATOMIC_IADD,
+			IMM_ATOMIC_AND,
+			IMM_ATOMIC_OR,
+			IMM_ATOMIC_XOR,
+			IMM_ATOMIC_EXCH,
+			IMM_ATOMIC_CMP_EXCH,
+			IMM_ATOMIC_IMAX,
+			IMM_ATOMIC_IMIN,
+			IMM_ATOMIC_UMAX,
+			IMM_ATOMIC_UMIN,
+			SYNC,
+			DADD,
+			DMAX,
+			DMIN,
+			DMUL,
+			DEQ,
+			DGE,
+			DLT,
+			DNE,
+			DMOV,
+			DMOVC,
+			DTOF,
+			FTOD,
+			EVAL_SNAPPED,
+			EVAL_SAMPLE_INDEX,
+			EVAL_CENTROID,
+			DCL_GS_INSTANCE_COUNT,
+			ABORT,
+			DEBUG_BREAK,
+
+			UnknownD3D11,
+			DDIV,
+			DFMA,
+			DRCP,
+			MSAD,
+			DTOI,
+			DTOU,
+			ITOD,
+			UTOD,
+
+			Count
+		};
+	};
+
+	const char* getName(DxbcOpcode::Enum _opcode);
+
+	struct DxbcBuiltin
+	{
+		// D3D_NAME
+		// https://msdn.microsoft.com/en-us/library/windows/desktop/ff728724%28v=vs.85%29.aspx
+		// mesa/src/gallium/state_trackers/d3d1x/d3d1xshader/defs/svs.txt
+		enum Enum
+		{
+			Undefined,
+			Position,
+			ClipDistance,
+			CullDistance,
+			RenderTargetArrayIndex,
+			ViewportArrayIndex,
+			VertexId,
+			PrimitiveId,
+			InstanceId,
+			IsFrontFace,
+			SampleIndex,
+			FinalQuadUEq0EdgeTessFactor,
+			FinalQuadVEq0EdgeTessFactor,
+			FinalQuadUEq1EdgeTessFactor,
+			FinalQuadVEq1EdgeTessFactor,
+			FinalQuadUInsideTessFactor,
+			FinalQuadVInsideTessFactor,
+			FinalTriUEq0EdgeTessFactor,
+			FinalTriVEq0EdgeTessFactor,
+			FinalTriWEq0EdgeTessFactor,
+			FinalTriInsideTessFactor,
+			FinalLineDetailTessFactor,
+			FinalLineDensityTessFactor,
+			Target = 64,
+			Depth,
+			Coverage,
+			DepthGreaterEqual,
+			DepthLessEqual,
+			StencilRef,
+			InnerCoverage,
+		};
+	};
+
+	struct DxbcResourceDim
+	{
+		// D3D_SRV_DIMENSION
+		// https://msdn.microsoft.com/en-us/library/windows/desktop/ff728736%28v=vs.85%29.aspx
+		// mesa/src/gallium/state_trackers/d3d1x/d3d1xshader/defs/targets.txt
+		enum Enum
+		{
+			Unknown,
+			Buffer,
+			Texture1D,
+			Texture2D,
+			Texture2DMS,
+			Texture3D,
+			TextureCube,
+			Texture1DArray,
+			Texture2DArray,
+			Texture2DMSArray,
+			TextureCubearray,
+			RawBuffer,
+			StructuredBuffer,
+
+			Count
+		};
+	};
+
+	struct DxbcInterpolation
+	{
+		enum Enum
+		{
+			Unknown,
+			Constant,
+			Linear,
+			LinearCentroid,
+			LinearNoPerspective,
+			LinearNoPerspectiveCentroid,
+			LinearSample,
+			LinearNoPerspectiveSample,
+
+			Count
+		};
+	};
+
+	struct DxbcResourceReturnType
+	{
+		enum Enum
+		{
+			Unorm,
+			Snorm,
+			Sint,
+			Uint,
+			Float,
+			Mixed,
+			Double,
+			Continued,
+			Unused,
+
+			Count
+		};
+	};
+
+	struct DxbcComponentType
+	{
+		enum Enum
+		{
+			Unknown,
+			Uint32,
+			Int32,
+			Float,
+
+			Count
+		};
+	};
+
+	struct DxbcPrecision
+	{
+		enum Enum
+		{
+			Default,
+			Half,
+			Float2_8,
+			Reserved,
+			Int16,
+			Uint16,
+			Any16 = 0xf0,
+			Any10 = 0xf1,
+		};
+	};
+
+	struct DxbcOperandType
+	{
+		enum Enum
+		{
+			Temp,
+			Input,
+			Output,
+			TempArray,
+			Imm32,
+			Imm64,
+			Sampler,
+			Resource,
+			ConstantBuffer,
+			ImmConstantBuffer,
+			Label,
+			PrimitiveID,
+			OutputDepth,
+			Null,
+			Rasterizer,
+			CoverageMask,
+			Stream,
+			FunctionBody,
+			FunctionTable,
+			Interface,
+			FunctionInput,
+			FunctionOutput,
+			OutputControlPointId,
+			InputForkInstanceId,
+			InputJoinInstanceId,
+			InputControlPoint,
+			OutputControlPoint,
+			InputPatchConstant,
+			InputDomainPoint,
+			ThisPointer,
+			UnorderedAccessView,
+			ThreadGroupSharedMemory,
+			InputThreadId,
+			InputThreadGroupId,
+			InputThreadIdInGroup,
+			InputCoverageMask,
+			InputThreadIdInGroupFlattened,
+			InputGsInstanceId,
+			OutputDepthGreaterEqual,
+			OutputDepthLessEqual,
+			CycleCounter,
+
+			Count
+		};
+	};
+
+	struct DxbcOperandAddrMode
+	{
+		enum Enum
+		{
+			Imm32,
+			Imm64,
+			Reg,
+			RegImm32,
+			RegImm64,
+
+			Count
+		};
+	};
+
+	struct DxbcOperandMode
+	{
+		enum Enum
+		{
+			Mask,
+			Swizzle,
+			Scalar,
+
+			Count
+		};
+	};
+
+	struct DxbcSubOperand
+	{
+		DxbcOperandType::Enum type;
+		uint8_t mode;
+		uint8_t modeBits;
+		uint8_t num;
+		uint8_t numAddrModes;
+		uint8_t addrMode;
+		uint32_t regIndex;
+	};
+
+	struct DxbcOperand
+	{
+		DxbcOperandType::Enum type;
+		DxbcOperandMode::Enum mode;
+		uint8_t modeBits;
+		uint8_t num;
+		bool extended;
+		uint32_t extBits;
+
+		uint8_t numAddrModes;
+		uint8_t addrMode[3];
+		uint32_t regIndex[3];
+		DxbcSubOperand subOperand[3];
+
+		union
+		{
+			uint32_t imm32[4];
+			uint64_t imm64[4];
+		} un;
+	};
+
+	struct DxbcInstruction
+	{
+		struct ExtendedType
+		{
+			enum Enum
+			{
+				Empty,
+				SampleControls,
+				ResourceDim,
+				ResourceReturnType,
+
+				Count
+			};
+		};
+
+		DxbcOpcode::Enum opcode;
+		uint32_t value[3];
+		uint32_t length;
+		uint8_t numOperands;
+		ExtendedType::Enum extended[3];
+
+		//
+		DxbcResourceDim::Enum srv;
+		uint8_t samples;
+
+		//
+		DxbcInterpolation::Enum interpolation;
+
+		//
+		bool shadow;
+		bool mono;
+
+		//
+		bool allowRefactoring;
+		bool fp64;
+		bool earlyDepth;
+		bool enableBuffers;
+		bool skipOptimization;
+		bool enableMinPrecision;
+		bool enableDoubleExtensions;
+		bool enableShaderExtensions;
+
+		//
+		bool threadsInGroup;
+		bool sharedMemory;
+		bool uavGroup;
+		bool uavGlobal;
+
+		//
+		DxbcResourceReturnType::Enum retType;
+		bool saturate;
+		uint8_t testNZ;
+
+		//
+		uint8_t sampleOffsets[3];
+		uint8_t resourceTarget;
+		uint8_t resourceStride;
+		DxbcResourceReturnType::Enum resourceReturnTypes[4];
+
+		DxbcOperand operand[6];
+	};
+
+	int32_t read(bx::ReaderI* _reader, DxbcInstruction& _instruction);
+	int32_t write(bx::WriterI* _writer, const DxbcInstruction& _instruction);
+	int32_t toString(char* _out, int32_t _size, const DxbcInstruction& _instruction);
+
+	struct DxbcSignature
+	{
+		struct Element
+		{
+			stl::string name;
+			uint32_t semanticIndex;
+			DxbcBuiltin::Enum valueType;
+			DxbcComponentType::Enum componentType;
+			uint32_t registerIndex;
+			uint8_t mask;
+			uint8_t readWriteMask;
+			uint8_t stream;
+		};
+
+		uint32_t key;
+		stl::vector<Element> elements;
+	};
+
+	int32_t read(bx::ReaderSeekerI* _reader, DxbcSignature& _signature);
+	int32_t write(bx::WriterI* _writer, const DxbcSignature& _signature);
+
+	struct DxbcShader
+	{
+		uint32_t version;
+		stl::vector<uint8_t> byteCode;
+	};
+
+	int32_t read(bx::ReaderSeekerI* _reader, DxbcShader& _shader);
+	int32_t write(bx::WriterI* _writer, const DxbcShader& _shader);
+
+	typedef void (*DxbcParseFn)(uint32_t _offset, const DxbcInstruction& _instruction, void* _userData);
+	void parse(const DxbcShader& _src, DxbcParseFn _fn, void* _userData);
+
+	typedef void (*DxbcFilterFn)(DxbcInstruction& _instruction, void* _userData);
+	void filter(DxbcShader& _dst, const DxbcShader& _src, DxbcFilterFn _fn, void* _userData);
+
+	struct DxbcContext
+	{
+		struct Header
+		{
+			uint32_t magic;
+			uint8_t  hash[16];
+			uint32_t version;
+			uint32_t size;
+			uint32_t numChunks;
+		};
+
+		Header header;
+		DxbcSignature inputSignature;
+		DxbcSignature outputSignature;
+		DxbcShader shader;
+	};
+
+	int32_t read(bx::ReaderSeekerI* _reader, DxbcContext& _dxbc);
+	int32_t write(bx::WriterSeekerI* _writer, const DxbcContext& _dxbc);
+
+	/// Calculate DXBC hash from data.
+	void dxbcHash(const void* _data, uint32_t _size, void* _digest);
+
+} // namespace bgfx
+
+#endif // BGFX_SHADER_DXBC_H

+ 732 - 0
src/shader_spirv.cpp

@@ -0,0 +1,732 @@
+/*
+ * Copyright 2011-2015 Branimir Karadzic. All rights reserved.
+ * License: http://www.opensource.org/licenses/BSD-2-Clause
+ */
+
+#include "bgfx_p.h"
+#include "shader_spirv.h"
+
+namespace bgfx
+{
+	struct SpirvOpcodeInfo
+	{
+		uint8_t numOperands;
+		uint8_t numValues;
+		bool    hasVariable;
+	};
+
+	static const SpirvOpcodeInfo s_sprivOpcodeInfo[] =
+	{
+		{ 0, 0, false }, // Nop,
+		{ 0, 0, true  }, // Source
+		{ 0, 0, true  }, // SourceExtension
+		{ 0, 0, false }, // Extension
+		{ 0, 1, true  }, // ExtInstImport
+		{ 0, 2, false }, // MemoryModel
+		{ 0, 2, false }, // EntryPoint
+		{ 0, 0, false }, // ExecutionMode
+		{ 0, 1, false }, // TypeVoid
+		{ 0, 1, false }, // TypeBool
+		{ 0, 3, false }, // TypeInt
+		{ 0, 2, false }, // TypeFloat
+		{ 0, 3, false }, // TypeVector
+		{ 0, 3, false }, // TypeMatrix
+		{ 1, 7, false }, // TypeSampler
+		{ 0, 0, false }, // TypeFilter
+		{ 0, 0, false }, // TypeArray
+		{ 0, 0, false }, // TypeRuntimeArray
+		{ 0, 0, false }, // TypeStruct
+		{ 0, 0, false }, // TypeOpaque
+		{ 0, 3, false }, // TypePointer
+		{ 0, 2, true  }, // TypeFunction
+		{ 0, 0, false }, // TypeEvent
+		{ 0, 0, false }, // TypeDeviceEvent
+		{ 0, 0, false }, // TypeReserveId
+		{ 0, 0, false }, // TypeQueue
+		{ 0, 0, false }, // TypePipe
+		{ 0, 0, false }, // ConstantTrue
+		{ 0, 0, false }, // ConstantFalse
+		{ 0, 2, true  }, // Constant
+		{ 0, 2, true  }, // ConstantComposite
+		{ 0, 0, false }, // ConstantSampler
+		{ 0, 0, false }, // ConstantNullPointer
+		{ 0, 0, false }, // ConstantNullObject
+		{ 0, 0, false }, // SpecConstantTrue
+		{ 0, 0, false }, // SpecConstantFalse
+		{ 0, 0, false }, // SpecConstant
+		{ 0, 0, false }, // SpecConstantComposite
+		{ 0, 3, true  }, // Variable
+		{ 0, 0, false }, // VariableArray
+		{ 0, 4, false }, // Function
+		{ 0, 0, false }, // FunctionParameter
+		{ 0, 0, false }, // FunctionEnd
+		{ 0, 0, false }, // FunctionCall
+		{ 0, 0, false }, // ExtInst
+		{ 0, 0, false }, // Undef
+		{ 0, 0, false }, // Load
+		{ 0, 2, true  }, // Store
+		{ 0, 0, false }, // Phi
+		{ 0, 0, false }, // DecorationGroup
+		{ 0, 2, true  }, // Decorate
+		{ 0, 0, false }, // MemberDecorate
+		{ 0, 0, false }, // GroupDecorate
+		{ 0, 0, false }, // GroupMemberDecorate
+		{ 0, 1, true  }, // Name
+		{ 0, 1, true  }, // MemberName
+		{ 0, 0, false }, // String
+		{ 0, 0, false }, // Line
+		{ 0, 0, false }, // VectorExtractDynamic
+		{ 0, 0, false }, // VectorInsertDynamic
+		{ 0, 0, false }, // VectorShuffle
+		{ 0, 0, false }, // CompositeConstruct
+		{ 0, 0, false }, // CompositeExtract
+		{ 0, 0, false }, // CompositeInsert
+		{ 0, 0, false }, // CopyObject
+		{ 0, 0, false }, // CopyMemory
+		{ 0, 0, false }, // CopyMemorySized
+		{ 0, 0, false }, // Sampler
+		{ 0, 0, false }, // TextureSample
+		{ 0, 0, false }, // TextureSampleDref
+		{ 0, 0, false }, // TextureSampleLod
+		{ 0, 0, false }, // TextureSampleProj
+		{ 0, 0, false }, // TextureSampleGrad
+		{ 0, 0, false }, // TextureSampleOffset
+		{ 0, 0, false }, // TextureSampleProjLod
+		{ 0, 0, false }, // TextureSampleProjGrad
+		{ 0, 0, false }, // TextureSampleLodOffset
+		{ 0, 0, false }, // TextureSampleProjOffset
+		{ 0, 0, false }, // TextureSampleGradOffset
+		{ 0, 0, false }, // TextureSampleProjLodOffset
+		{ 0, 0, false }, // TextureSampleProjGradOffset
+		{ 0, 0, false }, // TextureFetchTexelLod
+		{ 0, 0, false }, // TextureFetchTexelOffset
+		{ 0, 0, false }, // TextureFetchSample
+		{ 0, 0, false }, // TextureFetchTexel
+		{ 0, 0, false }, // TextureGather
+		{ 0, 0, false }, // TextureGatherOffset
+		{ 0, 0, false }, // TextureGatherOffsets
+		{ 0, 0, false }, // TextureQuerySizeLod
+		{ 0, 0, false }, // TextureQuerySize
+		{ 0, 0, false }, // TextureQueryLod
+		{ 0, 0, false }, // TextureQueryLevels
+		{ 0, 0, false }, // TextureQuerySamples
+		{ 0, 0, false }, // AccessChain
+		{ 0, 0, false }, // InBoundsAccessChain
+		{ 0, 0, false }, // SNegate
+		{ 0, 0, false }, // FNegate
+		{ 0, 0, false }, // Not
+		{ 0, 0, false }, // Any
+		{ 0, 0, false }, // All
+		{ 0, 0, false }, // ConvertFToU
+		{ 0, 0, false }, // ConvertFToS
+		{ 0, 0, false }, // ConvertSToF
+		{ 0, 0, false }, // ConvertUToF
+		{ 0, 0, false }, // UConvert
+		{ 0, 0, false }, // SConvert
+		{ 0, 0, false }, // FConvert
+		{ 0, 0, false }, // ConvertPtrToU
+		{ 0, 0, false }, // ConvertUToPtr
+		{ 0, 0, false }, // PtrCastToGeneric
+		{ 0, 0, false }, // GenericCastToPtr
+		{ 0, 0, false }, // Bitcast
+		{ 0, 0, false }, // Transpose
+		{ 0, 0, false }, // IsNan
+		{ 0, 0, false }, // IsInf
+		{ 0, 0, false }, // IsFinite
+		{ 0, 0, false }, // IsNormal
+		{ 0, 0, false }, // SignBitSet
+		{ 0, 0, false }, // LessOrGreater
+		{ 0, 0, false }, // Ordered
+		{ 0, 0, false }, // Unordered
+		{ 0, 0, false }, // ArrayLength
+		{ 0, 0, false }, // IAdd
+		{ 0, 0, false }, // FAdd
+		{ 0, 0, false }, // ISub
+		{ 0, 0, false }, // FSub
+		{ 0, 0, false }, // IMul
+		{ 0, 0, false }, // FMul
+		{ 0, 0, false }, // UDiv
+		{ 0, 0, false }, // SDiv
+		{ 0, 0, false }, // FDiv
+		{ 0, 0, false }, // UMod
+		{ 0, 0, false }, // SRem
+		{ 0, 0, false }, // SMod
+		{ 0, 0, false }, // FRem
+		{ 0, 0, false }, // FMod
+		{ 0, 0, false }, // VectorTimesScalar
+		{ 0, 0, false }, // MatrixTimesScalar
+		{ 0, 0, false }, // VectorTimesMatrix
+		{ 0, 0, false }, // MatrixTimesVector
+		{ 0, 0, false }, // MatrixTimesMatrix
+		{ 0, 0, false }, // OuterProduct
+		{ 0, 0, false }, // Dot
+		{ 0, 0, false }, // ShiftRightLogical
+		{ 0, 0, false }, // ShiftRightArithmetic
+		{ 0, 0, false }, // ShiftLeftLogical
+		{ 0, 0, false }, // LogicalOr
+		{ 0, 0, false }, // LogicalXor
+		{ 0, 0, false }, // LogicalAnd
+		{ 0, 0, false }, // BitwiseOr
+		{ 0, 0, false }, // BitwiseXor
+		{ 0, 0, false }, // BitwiseAnd
+		{ 0, 0, false }, // Select
+		{ 0, 0, false }, // IEqual
+		{ 0, 0, false }, // FOrdEqual
+		{ 0, 0, false }, // FUnordEqual
+		{ 0, 0, false }, // INotEqual
+		{ 0, 0, false }, // FOrdNotEqual
+		{ 0, 0, false }, // FUnordNotEqual
+		{ 0, 0, false }, // ULessThan
+		{ 0, 0, false }, // SLessThan
+		{ 0, 0, false }, // FOrdLessThan
+		{ 0, 0, false }, // FUnordLessThan
+		{ 0, 0, false }, // UGreaterThan
+		{ 0, 0, false }, // SGreaterThan
+		{ 0, 0, false }, // FOrdGreaterThan
+		{ 0, 0, false }, // FUnordGreaterThan
+		{ 0, 0, false }, // ULessThanEqual
+		{ 0, 0, false }, // SLessThanEqual
+		{ 0, 0, false }, // FOrdLessThanEqual
+		{ 0, 0, false }, // FUnordLessThanEqual
+		{ 0, 0, false }, // UGreaterThanEqual
+		{ 0, 0, false }, // SGreaterThanEqual
+		{ 0, 0, false }, // FOrdGreaterThanEqual
+		{ 0, 0, false }, // FUnordGreaterThanEqual
+		{ 0, 0, false }, // DPdx
+		{ 0, 0, false }, // DPdy
+		{ 0, 0, false }, // Fwidth
+		{ 0, 0, false }, // DPdxFine
+		{ 0, 0, false }, // DPdyFine
+		{ 0, 0, false }, // FwidthFine
+		{ 0, 0, false }, // DPdxCoarse
+		{ 0, 0, false }, // DPdyCoarse
+		{ 0, 0, false }, // FwidthCoarse
+		{ 0, 0, false }, // EmitVertex
+		{ 0, 0, false }, // EndPrimitive
+		{ 0, 0, false }, // EmitStreamVertex
+		{ 0, 0, false }, // EndStreamPrimitive
+		{ 0, 0, false }, // ControlBarrier
+		{ 0, 0, false }, // MemoryBarrier
+		{ 0, 0, false }, // ImagePointer
+		{ 0, 0, false }, // AtomicInit
+		{ 0, 0, false }, // AtomicLoad
+		{ 0, 0, false }, // AtomicStore
+		{ 0, 0, false }, // AtomicExchange
+		{ 0, 0, false }, // AtomicCompareExchange
+		{ 0, 0, false }, // AtomicCompareExchangeWeak
+		{ 0, 0, false }, // AtomicIIncrement
+		{ 0, 0, false }, // AtomicIDecrement
+		{ 0, 0, false }, // AtomicIAdd
+		{ 0, 0, false }, // AtomicISub
+		{ 0, 0, false }, // AtomicUMin
+		{ 0, 0, false }, // AtomicUMax
+		{ 0, 0, false }, // AtomicAnd
+		{ 0, 0, false }, // AtomicOr
+		{ 0, 0, false }, // AtomicXor
+		{ 0, 0, false }, // LoopMerge
+		{ 0, 0, false }, // SelectionMerge
+		{ 0, 1, false }, // Label
+		{ 0, 1, false }, // Branch
+		{ 0, 0, false }, // BranchConditional
+		{ 0, 0, false }, // Switch
+		{ 0, 0, false }, // Kill
+		{ 0, 0, false }, // Return
+		{ 0, 0, false }, // ReturnValue
+		{ 0, 0, false }, // Unreachable
+		{ 0, 0, false }, // LifetimeStart
+		{ 0, 0, false }, // LifetimeStop
+		{ 0, 0, false }, // CompileFlag
+		{ 0, 0, false }, // AsyncGroupCopy
+		{ 0, 0, false }, // WaitGroupEvents
+		{ 0, 0, false }, // GroupAll
+		{ 0, 0, false }, // GroupAny
+		{ 0, 0, false }, // GroupBroadcast
+		{ 0, 0, false }, // GroupIAdd
+		{ 0, 0, false }, // GroupFAdd
+		{ 0, 0, false }, // GroupFMin
+		{ 0, 0, false }, // GroupUMin
+		{ 0, 0, false }, // GroupSMin
+		{ 0, 0, false }, // GroupFMax
+		{ 0, 0, false }, // GroupUMax
+		{ 0, 0, false }, // GroupSMax
+		{ 0, 0, false }, // GenericCastToPtrExplicit
+		{ 0, 0, false }, // GenericPtrMemSemantics
+		{ 0, 0, false }, // ReadPipe
+		{ 0, 0, false }, // WritePipe
+		{ 0, 0, false }, // ReservedReadPipe
+		{ 0, 0, false }, // ReservedWritePipe
+		{ 0, 0, false }, // ReserveReadPipePackets
+		{ 0, 0, false }, // ReserveWritePipePackets
+		{ 0, 0, false }, // CommitReadPipe
+		{ 0, 0, false }, // CommitWritePipe
+		{ 0, 0, false }, // IsValidReserveId
+		{ 0, 0, false }, // GetNumPipePackets
+		{ 0, 0, false }, // GetMaxPipePackets
+		{ 0, 0, false }, // GroupReserveReadPipePackets
+		{ 0, 0, false }, // GroupReserveWritePipePackets
+		{ 0, 0, false }, // GroupCommitReadPipe
+		{ 0, 0, false }, // GroupCommitWritePipe
+		{ 0, 0, false }, // EnqueueMarker
+		{ 0, 0, false }, // EnqueueKernel
+		{ 0, 0, false }, // GetKernelNDrangeSubGroupCount
+		{ 0, 0, false }, // GetKernelNDrangeMaxSubGroupSize
+		{ 0, 0, false }, // GetKernelWorkGroupSize
+		{ 0, 0, false }, // GetKernelPreferredWorkGroupSizeMultiple
+		{ 0, 0, false }, // RetainEvent
+		{ 0, 0, false }, // ReleaseEvent
+		{ 0, 0, false }, // CreateUserEvent
+		{ 0, 0, false }, // IsValidEvent
+		{ 0, 0, false }, // SetUserEventStatus
+		{ 0, 0, false }, // CaptureEventProfilingInfo
+		{ 0, 0, false }, // GetDefaultQueue
+		{ 0, 0, false }, // BuildNDRange
+		{ 0, 0, false }, // SatConvertSToU
+		{ 0, 0, false }, // SatConvertUToS
+		{ 0, 0, false }, // AtomicIMin
+		{ 0, 0, false }, // AtomicIMax
+	};
+	BX_STATIC_ASSERT(BX_COUNTOF(s_sprivOpcodeInfo) == SpirvOpcode::Count);
+
+	const char* s_spirvOpcode[] =
+	{
+		"Nop",
+		"Source",
+		"SourceExtension",
+		"Extension",
+		"ExtInstImport",
+		"MemoryModel",
+		"EntryPoint",
+		"ExecutionMode",
+		"TypeVoid",
+		"TypeBool",
+		"TypeInt",
+		"TypeFloat",
+		"TypeVector",
+		"TypeMatrix",
+		"TypeSampler",
+		"TypeFilter",
+		"TypeArray",
+		"TypeRuntimeArray",
+		"TypeStruct",
+		"TypeOpaque",
+		"TypePointer",
+		"TypeFunction",
+		"TypeEvent",
+		"TypeDeviceEvent",
+		"TypeReserveId",
+		"TypeQueue",
+		"TypePipe",
+		"ConstantTrue",
+		"ConstantFalse",
+		"Constant",
+		"ConstantComposite",
+		"ConstantSampler",
+		"ConstantNullPointer",
+		"ConstantNullObject",
+		"SpecConstantTrue",
+		"SpecConstantFalse",
+		"SpecConstant",
+		"SpecConstantComposite",
+		"Variable",
+		"VariableArray",
+		"Function",
+		"FunctionParameter",
+		"FunctionEnd",
+		"FunctionCall",
+		"ExtInst",
+		"Undef",
+		"Load",
+		"Store",
+		"Phi",
+		"DecorationGroup",
+		"Decorate",
+		"MemberDecorate",
+		"GroupDecorate",
+		"GroupMemberDecorate",
+		"Name",
+		"MemberName",
+		"String",
+		"Line",
+		"VectorExtractDynamic",
+		"VectorInsertDynamic",
+		"VectorShuffle",
+		"CompositeConstruct",
+		"CompositeExtract",
+		"CompositeInsert",
+		"CopyObject",
+		"CopyMemory",
+		"CopyMemorySized",
+		"Sampler",
+		"TextureSample",
+		"TextureSampleDref",
+		"TextureSampleLod",
+		"TextureSampleProj",
+		"TextureSampleGrad",
+		"TextureSampleOffset",
+		"TextureSampleProjLod",
+		"TextureSampleProjGrad",
+		"TextureSampleLodOffset",
+		"TextureSampleProjOffset",
+		"TextureSampleGradOffset",
+		"TextureSampleProjLodOffset",
+		"TextureSampleProjGradOffset",
+		"TextureFetchTexelLod",
+		"TextureFetchTexelOffset",
+		"TextureFetchSample",
+		"TextureFetchTexel",
+		"TextureGather",
+		"TextureGatherOffset",
+		"TextureGatherOffsets",
+		"TextureQuerySizeLod",
+		"TextureQuerySize",
+		"TextureQueryLod",
+		"TextureQueryLevels",
+		"TextureQuerySamples",
+		"AccessChain",
+		"InBoundsAccessChain",
+		"SNegate",
+		"FNegate",
+		"Not",
+		"Any",
+		"All",
+		"ConvertFToU",
+		"ConvertFToS",
+		"ConvertSToF",
+		"ConvertUToF",
+		"UConvert",
+		"SConvert",
+		"FConvert",
+		"ConvertPtrToU",
+		"ConvertUToPtr",
+		"PtrCastToGeneric",
+		"GenericCastToPtr",
+		"Bitcast",
+		"Transpose",
+		"IsNan",
+		"IsInf",
+		"IsFinite",
+		"IsNormal",
+		"SignBitSet",
+		"LessOrGreater",
+		"Ordered",
+		"Unordered",
+		"ArrayLength",
+		"IAdd",
+		"FAdd",
+		"ISub",
+		"FSub",
+		"IMul",
+		"FMul",
+		"UDiv",
+		"SDiv",
+		"FDiv",
+		"UMod",
+		"SRem",
+		"SMod",
+		"FRem",
+		"FMod",
+		"VectorTimesScalar",
+		"MatrixTimesScalar",
+		"VectorTimesMatrix",
+		"MatrixTimesVector",
+		"MatrixTimesMatrix",
+		"OuterProduct",
+		"Dot",
+		"ShiftRightLogical",
+		"ShiftRightArithmetic",
+		"ShiftLeftLogical",
+		"LogicalOr",
+		"LogicalXor",
+		"LogicalAnd",
+		"BitwiseOr",
+		"BitwiseXor",
+		"BitwiseAnd",
+		"Select",
+		"IEqual",
+		"FOrdEqual",
+		"FUnordEqual",
+		"INotEqual",
+		"FOrdNotEqual",
+		"FUnordNotEqual",
+		"ULessThan",
+		"SLessThan",
+		"FOrdLessThan",
+		"FUnordLessThan",
+		"UGreaterThan",
+		"SGreaterThan",
+		"FOrdGreaterThan",
+		"FUnordGreaterThan",
+		"ULessThanEqual",
+		"SLessThanEqual",
+		"FOrdLessThanEqual",
+		"FUnordLessThanEqual",
+		"UGreaterThanEqual",
+		"SGreaterThanEqual",
+		"FOrdGreaterThanEqual",
+		"FUnordGreaterThanEqual",
+		"DPdx",
+		"DPdy",
+		"Fwidth",
+		"DPdxFine",
+		"DPdyFine",
+		"FwidthFine",
+		"DPdxCoarse",
+		"DPdyCoarse",
+		"FwidthCoarse",
+		"EmitVertex",
+		"EndPrimitive",
+		"EmitStreamVertex",
+		"EndStreamPrimitive",
+		"ControlBarrier",
+		"MemoryBarrier",
+		"ImagePointer",
+		"AtomicInit",
+		"AtomicLoad",
+		"AtomicStore",
+		"AtomicExchange",
+		"AtomicCompareExchange",
+		"AtomicCompareExchangeWeak",
+		"AtomicIIncrement",
+		"AtomicIDecrement",
+		"AtomicIAdd",
+		"AtomicISub",
+		"AtomicUMin",
+		"AtomicUMax",
+		"AtomicAnd",
+		"AtomicOr",
+		"AtomicXor",
+		"LoopMerge",
+		"SelectionMerge",
+		"Label",
+		"Branch",
+		"BranchConditional",
+		"Switch",
+		"Kill",
+		"Return",
+		"ReturnValue",
+		"Unreachable",
+		"LifetimeStart",
+		"LifetimeStop",
+		"CompileFlag",
+		"AsyncGroupCopy",
+		"WaitGroupEvents",
+		"GroupAll",
+		"GroupAny",
+		"GroupBroadcast",
+		"GroupIAdd",
+		"GroupFAdd",
+		"GroupFMin",
+		"GroupUMin",
+		"GroupSMin",
+		"GroupFMax",
+		"GroupUMax",
+		"GroupSMax",
+		"GenericCastToPtrExplicit",
+		"GenericPtrMemSemantics",
+		"ReadPipe",
+		"WritePipe",
+		"ReservedReadPipe",
+		"ReservedWritePipe",
+		"ReserveReadPipePackets",
+		"ReserveWritePipePackets",
+		"CommitReadPipe",
+		"CommitWritePipe",
+		"IsValidReserveId",
+		"GetNumPipePackets",
+		"GetMaxPipePackets",
+		"GroupReserveReadPipePackets",
+		"GroupReserveWritePipePackets",
+		"GroupCommitReadPipe",
+		"GroupCommitWritePipe",
+		"EnqueueMarker",
+		"EnqueueKernel",
+		"GetKernelNDrangeSubGroupCount",
+		"GetKernelNDrangeMaxSubGroupSize",
+		"GetKernelWorkGroupSize",
+		"GetKernelPreferredWorkGroupSizeMultiple",
+		"RetainEvent",
+		"ReleaseEvent",
+		"CreateUserEvent",
+		"IsValidEvent",
+		"SetUserEventStatus",
+		"CaptureEventProfilingInfo",
+		"GetDefaultQueue",
+		"BuildNDRange",
+		"SatConvertSToU",
+		"SatConvertUToS",
+		"AtomicIMin",
+		"AtomicIMax",
+	};
+	BX_STATIC_ASSERT(BX_COUNTOF(s_spirvOpcode) == SpirvOpcode::Count);
+
+	const char* getName(SpirvOpcode::Enum _opcode)
+	{
+		BX_CHECK(_opcode < SpirvOpcode::Count, "Unknown opcode id %d.", _opcode);
+		return s_spirvOpcode[_opcode];
+	}
+
+	int32_t read(bx::ReaderI* _reader, SpirvOperand& _operand)
+	{
+		int32_t size = 0;
+
+		BX_UNUSED(_operand);
+		uint32_t token;
+		size += bx::read(_reader, token);
+
+		return size;
+	}
+
+	int32_t read(bx::ReaderI* _reader, SpirvInstruction& _instruction)
+	{
+		int32_t size = 0;
+
+		uint32_t token;
+		size += bx::read(_reader, token);
+
+		_instruction.opcode = SpirvOpcode::Enum( (token & UINT32_C(0x0000ffff) )      );
+		_instruction.length =          uint16_t( (token & UINT32_C(0xffff0000) ) >> 16);
+
+		uint32_t currOp = 0;
+
+		const SpirvOpcodeInfo& info = s_sprivOpcodeInfo[_instruction.opcode];
+
+		if (0 < info.numValues)
+		{
+			size += read(_reader, _instruction.un.value, info.numValues*sizeof(uint32_t) );
+		}
+
+		if (info.hasVariable)
+		{
+			while (size/4 != _instruction.length)
+			{
+				uint32_t tmp;
+				size += bx::read(_reader, tmp);
+			}
+		}
+		else
+		{
+			_instruction.numOperands = info.numOperands;
+			switch (info.numOperands)
+			{
+			case 6: size += read(_reader, _instruction.operand[currOp++]);
+			case 5: size += read(_reader, _instruction.operand[currOp++]);
+			case 4: size += read(_reader, _instruction.operand[currOp++]);
+			case 3: size += read(_reader, _instruction.operand[currOp++]);
+			case 2: size += read(_reader, _instruction.operand[currOp++]);
+			case 1: size += read(_reader, _instruction.operand[currOp++]);
+			case 0:
+				break;
+
+			default:
+				BX_WARN(false, "Instruction %s with invalid number of operands %d (numValues %d)."
+						, getName(_instruction.opcode)
+						, info.numOperands
+						, info.numValues
+						);
+				break;
+			}
+
+			BX_WARN(size/4 == _instruction.length, "read %d, expected %d, %s"
+					, size/4
+					, _instruction.length
+					, getName(_instruction.opcode)
+					);
+			while (size/4 != _instruction.length)
+			{
+				uint32_t tmp;
+				size += bx::read(_reader, tmp);
+			}
+		}
+
+		return size;
+	}
+
+	int32_t write(bx::WriterI* _writer, const SpirvInstruction& _instruction)
+	{
+		int32_t size = 0;
+		BX_UNUSED(_writer, _instruction);
+		return size;
+	}
+
+	int32_t toString(char* _out, int32_t _size, const SpirvInstruction& _instruction)
+	{
+		int32_t size = 0;
+		size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
+					, "%s %d (%d, %d)"
+					, getName(_instruction.opcode)
+					, _instruction.numOperands
+					, _instruction.un.value[0]
+					, _instruction.un.value[1]
+					);
+
+		return size;
+	}
+
+	int32_t read(bx::ReaderSeekerI* _reader, SpirvShader& _shader)
+	{
+		int32_t size = 0;
+
+		uint32_t len = uint32_t(bx::getSize(_reader) - bx::seek(_reader) );
+		_shader.byteCode.resize(len);
+		size += bx::read(_reader, _shader.byteCode.data(), len);
+
+		return size;
+	}
+
+	int32_t write(bx::WriterI* _writer, const SpirvShader& _shader)
+	{
+		int32_t size = 0;
+		BX_UNUSED(_writer, _shader);
+		return size;
+	}
+
+#define SPIRV_MAGIC 0x07230203
+
+	int32_t read(bx::ReaderSeekerI* _reader, Spirv& _spirv)
+	{
+		int32_t size = 0;
+
+		size += bx::read(_reader, _spirv.header);
+
+		if (size != sizeof(Spirv::Header)
+		||  _spirv.header.magic != SPIRV_MAGIC
+		   )
+		{
+			// error
+			return -size;
+		}
+
+		size += read(_reader, _spirv.shader);
+
+		return size;
+	}
+
+	int32_t write(bx::WriterSeekerI* _writer, const Spirv& _spirv)
+	{
+		int32_t size = 0;
+		BX_UNUSED(_writer, _spirv);
+		return size;
+	}
+
+	void parse(const SpirvShader& _src, SpirvParseFn _fn, void* _userData)
+	{
+		bx::MemoryReader reader(_src.byteCode.data(), uint32_t(_src.byteCode.size() ) );
+
+		for (uint32_t token = 0, numTokens = uint32_t(_src.byteCode.size() / sizeof(uint32_t) ); token < numTokens;)
+		{
+			SpirvInstruction instruction;
+			uint32_t size = read(&reader, instruction);
+
+			BX_CHECK(size/4 == instruction.length, "read %d, expected %d, %s"
+					, size/4
+					, instruction.length
+					, getName(instruction.opcode)
+					);
+
+			_fn(token * sizeof(uint32_t), instruction, _userData);
+
+			token += instruction.length;
+		}
+	}
+
+} // namespace bgfx

+ 520 - 0
src/shader_spirv.h

@@ -0,0 +1,520 @@
+/*
+ * Copyright 2011-2015 Branimir Karadzic. All rights reserved.
+ * License: http://www.opensource.org/licenses/BSD-2-Clause
+ */
+
+#ifndef BGFX_SHADER_SPIRV_H
+#define BGFX_SHADER_SPIRV_H
+
+#include <bx/readerwriter.h>
+
+namespace bgfx
+{
+	// Reference: https://www.khronos.org/registry/spir-v/specs/1.0/SPIRV.html
+
+	struct SpirvOpcode
+	{
+		enum Enum
+		{
+			Nop,
+			Source,
+			SourceExtension,
+			Extension,
+			ExtInstImport,
+			MemoryModel,
+			EntryPoint,
+			ExecutionMode,
+			TypeVoid,
+			TypeBool,
+			TypeInt,
+			TypeFloat,
+			TypeVector,
+			TypeMatrix,
+			TypeSampler,
+			TypeFilter,
+			TypeArray,
+			TypeRuntimeArray,
+			TypeStruct,
+			TypeOpaque,
+			TypePointer,
+			TypeFunction,
+			TypeEvent,
+			TypeDeviceEvent,
+			TypeReserveId,
+			TypeQueue,
+			TypePipe,
+			ConstantTrue,
+			ConstantFalse,
+			Constant,
+			ConstantComposite,
+			ConstantSampler,
+			ConstantNullPointer,
+			ConstantNullObject,
+			SpecConstantTrue,
+			SpecConstantFalse,
+			SpecConstant,
+			SpecConstantComposite,
+			Variable,
+			VariableArray,
+			Function,
+			FunctionParameter,
+			FunctionEnd,
+			FunctionCall,
+			ExtInst,
+			Undef,
+			Load,
+			Store,
+			Phi,
+			DecorationGroup,
+			Decorate,
+			MemberDecorate,
+			GroupDecorate,
+			GroupMemberDecorate,
+			Name,
+			MemberName,
+			String,
+			Line,
+			VectorExtractDynamic,
+			VectorInsertDynamic,
+			VectorShuffle,
+			CompositeConstruct,
+			CompositeExtract,
+			CompositeInsert,
+			CopyObject,
+			CopyMemory,
+			CopyMemorySized,
+			Sampler,
+			TextureSample,
+			TextureSampleDref,
+			TextureSampleLod,
+			TextureSampleProj,
+			TextureSampleGrad,
+			TextureSampleOffset,
+			TextureSampleProjLod,
+			TextureSampleProjGrad,
+			TextureSampleLodOffset,
+			TextureSampleProjOffset,
+			TextureSampleGradOffset,
+			TextureSampleProjLodOffset,
+			TextureSampleProjGradOffset,
+			TextureFetchTexelLod,
+			TextureFetchTexelOffset,
+			TextureFetchSample,
+			TextureFetchTexel,
+			TextureGather,
+			TextureGatherOffset,
+			TextureGatherOffsets,
+			TextureQuerySizeLod,
+			TextureQuerySize,
+			TextureQueryLod,
+			TextureQueryLevels,
+			TextureQuerySamples,
+			AccessChain,
+			InBoundsAccessChain,
+			SNegate,
+			FNegate,
+			Not,
+			Any,
+			All,
+			ConvertFToU,
+			ConvertFToS,
+			ConvertSToF,
+			ConvertUToF,
+			UConvert,
+			SConvert,
+			FConvert,
+			ConvertPtrToU,
+			ConvertUToPtr,
+			PtrCastToGeneric,
+			GenericCastToPtr,
+			Bitcast,
+			Transpose,
+			IsNan,
+			IsInf,
+			IsFinite,
+			IsNormal,
+			SignBitSet,
+			LessOrGreater,
+			Ordered,
+			Unordered,
+			ArrayLength,
+			IAdd,
+			FAdd,
+			ISub,
+			FSub,
+			IMul,
+			FMul,
+			UDiv,
+			SDiv,
+			FDiv,
+			UMod,
+			SRem,
+			SMod,
+			FRem,
+			FMod,
+			VectorTimesScalar,
+			MatrixTimesScalar,
+			VectorTimesMatrix,
+			MatrixTimesVector,
+			MatrixTimesMatrix,
+			OuterProduct,
+			Dot,
+			ShiftRightLogical,
+			ShiftRightArithmetic,
+			ShiftLeftLogical,
+			LogicalOr,
+			LogicalXor,
+			LogicalAnd,
+			BitwiseOr,
+			BitwiseXor,
+			BitwiseAnd,
+			Select,
+			IEqual,
+			FOrdEqual,
+			FUnordEqual,
+			INotEqual,
+			FOrdNotEqual,
+			FUnordNotEqual,
+			ULessThan,
+			SLessThan,
+			FOrdLessThan,
+			FUnordLessThan,
+			UGreaterThan,
+			SGreaterThan,
+			FOrdGreaterThan,
+			FUnordGreaterThan,
+			ULessThanEqual,
+			SLessThanEqual,
+			FOrdLessThanEqual,
+			FUnordLessThanEqual,
+			UGreaterThanEqual,
+			SGreaterThanEqual,
+			FOrdGreaterThanEqual,
+			FUnordGreaterThanEqual,
+			DPdx,
+			DPdy,
+			Fwidth,
+			DPdxFine,
+			DPdyFine,
+			FwidthFine,
+			DPdxCoarse,
+			DPdyCoarse,
+			FwidthCoarse,
+			EmitVertex,
+			EndPrimitive,
+			EmitStreamVertex,
+			EndStreamPrimitive,
+			ControlBarrier,
+			MemoryBarrier,
+			ImagePointer,
+			AtomicInit,
+			AtomicLoad,
+			AtomicStore,
+			AtomicExchange,
+			AtomicCompareExchange,
+			AtomicCompareExchangeWeak,
+			AtomicIIncrement,
+			AtomicIDecrement,
+			AtomicIAdd,
+			AtomicISub,
+			AtomicUMin,
+			AtomicUMax,
+			AtomicAnd,
+			AtomicOr,
+			AtomicXor,
+			LoopMerge,
+			SelectionMerge,
+			Label,
+			Branch,
+			BranchConditional,
+			Switch,
+			Kill,
+			Return,
+			ReturnValue,
+			Unreachable,
+			LifetimeStart,
+			LifetimeStop,
+			CompileFlag,
+			AsyncGroupCopy,
+			WaitGroupEvents,
+			GroupAll,
+			GroupAny,
+			GroupBroadcast,
+			GroupIAdd,
+			GroupFAdd,
+			GroupFMin,
+			GroupUMin,
+			GroupSMin,
+			GroupFMax,
+			GroupUMax,
+			GroupSMax,
+			GenericCastToPtrExplicit,
+			GenericPtrMemSemantics,
+			ReadPipe,
+			WritePipe,
+			ReservedReadPipe,
+			ReservedWritePipe,
+			ReserveReadPipePackets,
+			ReserveWritePipePackets,
+			CommitReadPipe,
+			CommitWritePipe,
+			IsValidReserveId,
+			GetNumPipePackets,
+			GetMaxPipePackets,
+			GroupReserveReadPipePackets,
+			GroupReserveWritePipePackets,
+			GroupCommitReadPipe,
+			GroupCommitWritePipe,
+			EnqueueMarker,
+			EnqueueKernel,
+			GetKernelNDrangeSubGroupCount,
+			GetKernelNDrangeMaxSubGroupSize,
+			GetKernelWorkGroupSize,
+			GetKernelPreferredWorkGroupSizeMultiple,
+			RetainEvent,
+			ReleaseEvent,
+			CreateUserEvent,
+			IsValidEvent,
+			SetUserEventStatus,
+			CaptureEventProfilingInfo,
+			GetDefaultQueue,
+			BuildNDRange,
+			SatConvertSToU,
+			SatConvertUToS,
+			AtomicIMin,
+			AtomicIMax,
+
+			Count
+		};
+	};
+
+	struct SpirvBuiltin
+	{
+		enum Enum
+		{
+			Position,
+			PointSize,
+			ClipVertex,
+			ClipDistance,
+			CullDistance,
+			VertexId,
+			InstanceId,
+			BuiltInPrimitiveId,
+			InvocationId,
+			Layer,
+			ViewportIndex,
+			TessLevelOuter,
+			TessLevelInner,
+			TessCoord,
+			PatchVertices,
+			FragCoord,
+			PointCoord,
+			FrontFacing,
+			SampleId,
+			SamplePosition,
+			SampleMask,
+			FragColor,
+			FragDepth,
+			HelperInvocation,
+			NumWorkgroups,
+			WorkgroupSize,
+			WorkgroupId,
+			LocalInvocationId,
+			GlobalInvocationId,
+			LocalInvocationIndex,
+			WorkDim,
+			GlobalSize,
+			EnqueuedWorkgroupSize,
+			GlobalOffset,
+			GlobalLinearId,
+			WorkgroupLinearId,
+			SubgroupSize,
+			SubgroupMaxSize,
+			NumSubgroups,
+			NumEnqueuedSubgroups,
+			SubgroupId,
+			SubgroupLocalInvocationId,
+		};
+	};
+
+	struct SpirvExecutionModel
+	{
+		enum Enum
+		{
+			Vertex,
+			TessellationControl,
+			TessellationEvaluation,
+			Geometry,
+			Fragment,
+			GLCompute,
+			Kernel,
+
+			Count
+		};
+	};
+
+	struct SpirvMemoryModel
+	{
+		enum Enum
+		{
+			Simple,
+			GLSL450,
+			OpenCL12,
+			OpenCL20,
+			OpenCL21,
+
+			Count
+		};
+	};
+
+	struct SpirvStorageClass
+	{
+		enum Enum
+		{
+			UniformConstant,
+			Input,
+			Uniform,
+			Output,
+			WorkgroupLocal,
+			WorkgroupGlobal,
+			PrivateGlobal,
+			Function,
+			Generic,
+			Private,
+			AtomicCounter,
+		};
+	};
+
+	struct SpirvResourceDim
+	{
+		enum Enum
+		{
+			Texture1D,
+			Texture2D,
+			Texture3D,
+			TextureCube,
+			TextureRect,
+			Buffer,
+		};
+	};
+
+	struct SpirvDecoration
+	{
+		enum Enum
+		{
+			PrecisionLow,
+			PrecisionMedium,
+			PrecisionHigh,
+			Block,
+			BufferBlock,
+			RowMajor,
+			ColMajor,
+			GLSLShared,
+			GLSLStd140,
+			GLSLStd430,
+			GLSLPacked,
+			Smooth,
+			Noperspective,
+			Flat,
+			Patch,
+			Centroid,
+			Sample,
+			Invariant,
+			Restrict,
+			Aliased,
+			Volatile,
+			Constant,
+			Coherent,
+			Nonwritable,
+			Nonreadable,
+			Uniform,
+			NoStaticUse,
+			CPacked,
+			SaturatedConversion,
+			Stream,
+			Location,
+			Component,
+			Index,
+			Binding,
+			DescriptorSet,
+			Offset,
+			Alignment,
+			XfbBuffer,
+			Stride,
+			BuiltIn,
+			FuncParamAttr,
+			FPRoundingMode,
+			FPFastMathMode,
+			LinkageAttributes,
+			SpecId,
+
+			Count
+		};
+	};
+
+	struct SpirvOperand
+	{
+	};
+
+	struct SpirvInstruction
+	{
+		SpirvOpcode::Enum opcode;
+		uint16_t length;
+
+		uint8_t numOperands;
+		SpirvOperand operand[6];
+
+		union
+		{
+			struct ResultTypeId
+			{
+				uint32_t resultType;
+				uint32_t id;
+			};
+
+			ResultTypeId constant;
+			ResultTypeId constantComposite;
+
+			uint32_t value[8];
+		} un;
+	};
+
+	int32_t read(bx::ReaderI* _reader, SpirvInstruction& _instruction);
+	int32_t write(bx::WriterI* _writer, const SpirvInstruction& _instruction);
+	int32_t toString(char* _out, int32_t _size, const SpirvInstruction& _instruction);
+
+	struct SpirvShader
+	{
+		stl::vector<uint8_t> byteCode;
+	};
+
+	int32_t read(bx::ReaderSeekerI* _reader, SpirvShader& _shader);
+	int32_t write(bx::WriterI* _writer, const SpirvShader& _shader);
+
+	typedef void (*SpirvParseFn)(uint32_t _offset, const SpirvInstruction& _instruction, void* _userData);
+	void parse(const SpirvShader& _src, SpirvParseFn _fn, void* _userData);
+
+	typedef void (*SpirvFilterFn)(SpirvInstruction& _instruction, void* _userData);
+	void filter(SpirvShader& _dst, const SpirvShader& _src, SpirvFilterFn _fn, void* _userData);
+
+	struct Spirv
+	{
+		struct Header
+		{
+			uint32_t magic;
+			uint32_t version;
+			uint32_t generator;
+			uint32_t bound;
+			uint32_t schema;
+		};
+
+		Header header;
+		SpirvShader shader;
+	};
+
+	int32_t read(bx::ReaderSeekerI* _reader, Spirv& _spirv);
+	int32_t write(bx::WriterSeekerI* _writer, const Spirv& _spirv);
+
+} // namespace bgfx
+
+#endif // BGFX_SHADER_SPIRV_H