Răsfoiți Sursa

Initial compute support.

Branimir Karadžić 11 ani în urmă
părinte
comite
62b620a1a0
17 a modificat fișierele cu 1617 adăugiri și 440 ștergeri
  1. 22 1
      include/bgfx.h
  2. 2 0
      include/bgfxdefines.h
  3. 29 0
      premake/shader.mk
  4. 99 16
      src/bgfx.cpp
  5. 121 0
      src/bgfx_compute.sh
  6. 225 73
      src/bgfx_p.h
  7. 2 0
      src/config.h
  8. 16 2
      src/glcontext_egl.cpp
  9. 37 1
      src/glimports.h
  10. 4 2
      src/image.cpp
  11. 261 90
      src/renderer_d3d11.cpp
  12. 22 11
      src/renderer_d3d11.h
  13. 75 65
      src/renderer_d3d9.cpp
  14. 19 8
      src/renderer_d3d9.h
  15. 376 126
      src/renderer_gl.cpp
  16. 80 0
      src/renderer_gl.h
  17. 227 45
      tools/shaderc/shaderc.cpp

+ 22 - 1
include/bgfx.h

@@ -49,6 +49,18 @@ namespace bgfx
 		};
 		};
 	};
 	};
 
 
+	struct Access
+	{
+		enum Enum
+		{
+			Read,
+			Write,
+			ReadWrite,
+
+			Count
+		};
+	};
+
 	struct Attrib
 	struct Attrib
 	{
 	{
 		enum Enum // corresponds to vertex shader attribute:
 		enum Enum // corresponds to vertex shader attribute:
@@ -1082,7 +1094,16 @@ namespace bgfx
 	///
 	///
 	uint32_t submitMask(uint32_t _viewMask, int32_t _depth = 0);
 	uint32_t submitMask(uint32_t _viewMask, int32_t _depth = 0);
 
 
-	/// Discard all previously set state for draw call.
+	///
+	void setImage(uint8_t _stage, UniformHandle _sampler, TextureHandle _handle, uint8_t _mip, TextureFormat::Enum _format, Access::Enum _access);
+
+	///
+	void setImage(uint8_t _stage, UniformHandle _sampler, FrameBufferHandle _handle, uint8_t _attachment, TextureFormat::Enum _format, Access::Enum _access);
+
+	/// Dispatch compute.
+	void dispatch(uint8_t _id, ProgramHandle _handle, uint16_t _numX = 1, uint16_t _numY = 1, uint16_t _numZ = 1);
+
+	/// Discard all previously set state for draw or compute call.
 	void discard();
 	void discard();
 
 
 	/// Request screen shot.
 	/// Request screen shot.

+ 2 - 0
include/bgfxdefines.h

@@ -236,6 +236,7 @@
 #define BGFX_TEXTURE_COMPARE_ALWAYS      UINT32_C(0x00080000)
 #define BGFX_TEXTURE_COMPARE_ALWAYS      UINT32_C(0x00080000)
 #define BGFX_TEXTURE_COMPARE_SHIFT       16
 #define BGFX_TEXTURE_COMPARE_SHIFT       16
 #define BGFX_TEXTURE_COMPARE_MASK        UINT32_C(0x000f0000)
 #define BGFX_TEXTURE_COMPARE_MASK        UINT32_C(0x000f0000)
+#define BGFX_TEXTURE_COMPUTE_WRITE       UINT32_C(0x00100000)
 #define BGFX_TEXTURE_RESERVED_SHIFT      24
 #define BGFX_TEXTURE_RESERVED_SHIFT      24
 #define BGFX_TEXTURE_RESERVED_MASK       UINT32_C(0xff000000)
 #define BGFX_TEXTURE_RESERVED_MASK       UINT32_C(0xff000000)
 
 
@@ -295,6 +296,7 @@
 #define BGFX_CAPS_RENDERER_MULTITHREADED UINT64_C(0x0000000020000000)
 #define BGFX_CAPS_RENDERER_MULTITHREADED UINT64_C(0x0000000020000000)
 #define BGFX_CAPS_FRAGMENT_DEPTH         UINT64_C(0x0000000040000000)
 #define BGFX_CAPS_FRAGMENT_DEPTH         UINT64_C(0x0000000040000000)
 #define BGFX_CAPS_BLEND_INDEPENDENT      UINT64_C(0x0000000080000000)
 #define BGFX_CAPS_BLEND_INDEPENDENT      UINT64_C(0x0000000080000000)
+#define BGFX_CAPS_COMPUTE                UINT64_C(0x0000000100000000)
 
 
 #define BGFX_CAPS_TEXTURE_DEPTH_MASK (0 \
 #define BGFX_CAPS_TEXTURE_DEPTH_MASK (0 \
 			| BGFX_CAPS_TEXTURE_FORMAT_D16 \
 			| BGFX_CAPS_TEXTURE_FORMAT_D16 \

+ 29 - 0
premake/shader.mk

@@ -46,6 +46,7 @@ else
 ifeq ($(TARGET), 1)
 ifeq ($(TARGET), 1)
 VS_FLAGS=--platform windows -p vs_4_0 -O 3
 VS_FLAGS=--platform windows -p vs_4_0 -O 3
 FS_FLAGS=--platform windows -p ps_4_0 -O 3
 FS_FLAGS=--platform windows -p ps_4_0 -O 3
+CS_FLAGS=--platform windows -p cs_5_0 -O 3
 SHADER_PATH=shaders/dx11
 SHADER_PATH=shaders/dx11
 else
 else
 ifeq ($(TARGET), 2)
 ifeq ($(TARGET), 2)
@@ -56,11 +57,13 @@ else
 ifeq ($(TARGET), 3)
 ifeq ($(TARGET), 3)
 VS_FLAGS=--platform android
 VS_FLAGS=--platform android
 FS_FLAGS=--platform android
 FS_FLAGS=--platform android
+CS_FLAGS=--platform android
 SHADER_PATH=shaders/gles
 SHADER_PATH=shaders/gles
 else
 else
 ifeq ($(TARGET), 4)
 ifeq ($(TARGET), 4)
 VS_FLAGS=--platform linux -p 120
 VS_FLAGS=--platform linux -p 120
 FS_FLAGS=--platform linux -p 120
 FS_FLAGS=--platform linux -p 120
+CS_FLAGS=--platform linux -p 430
 SHADER_PATH=shaders/glsl
 SHADER_PATH=shaders/glsl
 endif
 endif
 endif
 endif
@@ -71,6 +74,7 @@ endif
 THISDIR := $(dir $(lastword $(MAKEFILE_LIST)))
 THISDIR := $(dir $(lastword $(MAKEFILE_LIST)))
 VS_FLAGS+=-i $(THISDIR)../src/
 VS_FLAGS+=-i $(THISDIR)../src/
 FS_FLAGS+=-i $(THISDIR)../src/
 FS_FLAGS+=-i $(THISDIR)../src/
+CS_FLAGS+=-i $(THISDIR)../src/
 
 
 BUILD_OUTPUT_DIR=$(addprefix ./, $(RUNTIME_DIR)/$(SHADER_PATH))
 BUILD_OUTPUT_DIR=$(addprefix ./, $(RUNTIME_DIR)/$(SHADER_PATH))
 BUILD_INTERMEDIATE_DIR=$(addprefix $(BUILD_DIR)/, $(SHADER_PATH))
 BUILD_INTERMEDIATE_DIR=$(addprefix $(BUILD_DIR)/, $(SHADER_PATH))
@@ -81,12 +85,31 @@ VS_DEPS=$(addprefix $(BUILD_INTERMEDIATE_DIR)/,$(addsuffix .bin.d, $(basename $(
 FS_SOURCES=$(wildcard fs_*.sc)
 FS_SOURCES=$(wildcard fs_*.sc)
 FS_DEPS=$(addprefix $(BUILD_INTERMEDIATE_DIR)/,$(addsuffix .bin.d, $(basename $(FS_SOURCES))))
 FS_DEPS=$(addprefix $(BUILD_INTERMEDIATE_DIR)/,$(addsuffix .bin.d, $(basename $(FS_SOURCES))))
 
 
+CS_SOURCES=$(wildcard cs_*.sc)
+CS_DEPS=$(addprefix $(BUILD_INTERMEDIATE_DIR)/,$(addsuffix .bin.d, $(basename $(CS_SOURCES))))
+
 VS_BIN = $(addprefix $(BUILD_INTERMEDIATE_DIR)/, $(addsuffix .bin, $(basename $(VS_SOURCES))))
 VS_BIN = $(addprefix $(BUILD_INTERMEDIATE_DIR)/, $(addsuffix .bin, $(basename $(VS_SOURCES))))
 FS_BIN = $(addprefix $(BUILD_INTERMEDIATE_DIR)/, $(addsuffix .bin, $(basename $(FS_SOURCES))))
 FS_BIN = $(addprefix $(BUILD_INTERMEDIATE_DIR)/, $(addsuffix .bin, $(basename $(FS_SOURCES))))
+CS_BIN = $(addprefix $(BUILD_INTERMEDIATE_DIR)/, $(addsuffix .bin, $(basename $(CS_SOURCES))))
 
 
 BIN = $(VS_BIN) $(FS_BIN)
 BIN = $(VS_BIN) $(FS_BIN)
 ASM = $(VS_ASM) $(FS_ASM)
 ASM = $(VS_ASM) $(FS_ASM)
 
 
+ifeq ($(TARGET), 1)
+BIN += $(CS_BIN)
+ASM += $(CS_ASM)
+else
+ifeq ($(TARGET), 3)
+BIN += $(CS_BIN)
+ASM += $(CS_ASM)
+else
+ifeq ($(TARGET), 4)
+BIN += $(CS_BIN)
+ASM += $(CS_ASM)
+endif
+endif
+endif
+
 $(BUILD_INTERMEDIATE_DIR)/vs_%.bin : vs_%.sc
 $(BUILD_INTERMEDIATE_DIR)/vs_%.bin : vs_%.sc
 	@echo [$(<)]
 	@echo [$(<)]
 	$(SILENT) $(SHADERC) $(VS_FLAGS) --type vertex --depends -o $(@) -f $(<) --disasm
 	$(SILENT) $(SHADERC) $(VS_FLAGS) --type vertex --depends -o $(@) -f $(<) --disasm
@@ -97,6 +120,11 @@ $(BUILD_INTERMEDIATE_DIR)/fs_%.bin : fs_%.sc
 	$(SILENT) $(SHADERC) $(FS_FLAGS) --type fragment --depends -o $(@) -f $(<) --disasm
 	$(SILENT) $(SHADERC) $(FS_FLAGS) --type fragment --depends -o $(@) -f $(<) --disasm
 	$(SILENT) cp $(@) $(BUILD_OUTPUT_DIR)/$(@F)
 	$(SILENT) cp $(@) $(BUILD_OUTPUT_DIR)/$(@F)
 
 
+$(BUILD_INTERMEDIATE_DIR)/cs_%.bin : cs_%.sc
+	@echo [$(<)]
+	$(SILENT) $(SHADERC) $(CS_FLAGS) --type compute --depends -o $(@) -f $(<) --disasm
+	$(SILENT) cp $(@) $(BUILD_OUTPUT_DIR)/$(@F)
+
 .PHONY: all
 .PHONY: all
 all: dirs $(BIN)
 all: dirs $(BIN)
 	@echo Target $(SHADER_PATH)
 	@echo Target $(SHADER_PATH)
@@ -119,3 +147,4 @@ endif # TARGET
 
 
 -include $(VS_DEPS)
 -include $(VS_DEPS)
 -include $(FS_DEPS)
 -include $(FS_DEPS)
+-include $(CS_DEPS)

+ 99 - 16
src/bgfx.cpp

@@ -644,12 +644,14 @@ namespace bgfx
 		}
 		}
 
 
 		if (BGFX_CONFIG_MAX_DRAW_CALLS-1 <= m_num
 		if (BGFX_CONFIG_MAX_DRAW_CALLS-1 <= m_num
-		|| (0 == m_state.m_numVertices && 0 == m_state.m_numIndices) )
+		|| (0 == m_draw.m_numVertices && 0 == m_draw.m_numIndices) )
 		{
 		{
 			++m_numDropped;
 			++m_numDropped;
 			return m_num;
 			return m_num;
 		}
 		}
 
 
+		m_constEnd = m_constantBuffer->getPos();
+
 		BX_WARN(invalidHandle != m_key.m_program, "Program with invalid handle");
 		BX_WARN(invalidHandle != m_key.m_program, "Program with invalid handle");
 		if (invalidHandle != m_key.m_program)
 		if (invalidHandle != m_key.m_program)
 		{
 		{
@@ -657,18 +659,20 @@ namespace bgfx
 			m_key.m_view = _id;
 			m_key.m_view = _id;
 			m_key.m_seq = s_ctx->m_seq[_id] & s_ctx->m_seqMask[_id];
 			m_key.m_seq = s_ctx->m_seq[_id] & s_ctx->m_seqMask[_id];
 			s_ctx->m_seq[_id]++;
 			s_ctx->m_seq[_id]++;
-			uint64_t key = m_key.encode();
+			uint64_t key = m_key.encodeDraw();
 			m_sortKeys[m_num] = key;
 			m_sortKeys[m_num] = key;
-			m_sortValues[m_num] = m_numRenderStates;
+			m_sortValues[m_num] = m_numRenderItems;
 			++m_num;
 			++m_num;
 
 
-			m_state.m_constEnd = m_constantBuffer->getPos();
-			m_state.m_flags |= m_flags;
-			m_renderState[m_numRenderStates] = m_state;
-			++m_numRenderStates;
+			m_draw.m_constBegin = m_constBegin;
+			m_draw.m_constEnd   = m_constEnd;
+			m_draw.m_flags |= m_flags;
+			m_renderItem[m_numRenderItems].draw = m_draw;
+			++m_numRenderItems;
 		}
 		}
 
 
-		m_state.clear();
+		m_draw.clear();
+		m_constBegin = m_constEnd;
 		m_flags = BGFX_STATE_NONE;
 		m_flags = BGFX_STATE_NONE;
 
 
 		return m_num;
 		return m_num;
@@ -683,12 +687,14 @@ namespace bgfx
 		}
 		}
 
 
 		if (BGFX_CONFIG_MAX_DRAW_CALLS-1 <= m_num
 		if (BGFX_CONFIG_MAX_DRAW_CALLS-1 <= m_num
-		|| (0 == m_state.m_numVertices && 0 == m_state.m_numIndices) )
+		|| (0 == m_draw.m_numVertices && 0 == m_draw.m_numIndices) )
 		{
 		{
 			m_numDropped += bx::uint32_cntbits(_viewMask);
 			m_numDropped += bx::uint32_cntbits(_viewMask);
 			return m_num;
 			return m_num;
 		}
 		}
 
 
+		m_constEnd = m_constantBuffer->getPos();
+
 		BX_WARN(invalidHandle != m_key.m_program, "Program with invalid handle");
 		BX_WARN(invalidHandle != m_key.m_program, "Program with invalid handle");
 		if (invalidHandle != m_key.m_program)
 		if (invalidHandle != m_key.m_program)
 		{
 		{
@@ -702,24 +708,69 @@ namespace bgfx
 				m_key.m_view = id;
 				m_key.m_view = id;
 				m_key.m_seq = s_ctx->m_seq[id] & s_ctx->m_seqMask[id];
 				m_key.m_seq = s_ctx->m_seq[id] & s_ctx->m_seqMask[id];
 				s_ctx->m_seq[id]++;
 				s_ctx->m_seq[id]++;
-				uint64_t key = m_key.encode();
+				uint64_t key = m_key.encodeDraw();
 				m_sortKeys[m_num] = key;
 				m_sortKeys[m_num] = key;
-				m_sortValues[m_num] = m_numRenderStates;
+				m_sortValues[m_num] = m_numRenderItems;
 				++m_num;
 				++m_num;
 			}
 			}
 
 
-			m_state.m_constEnd = m_constantBuffer->getPos();
-			m_state.m_flags |= m_flags;
-			m_renderState[m_numRenderStates] = m_state;
-			++m_numRenderStates;
+			m_draw.m_constBegin = m_constBegin;
+			m_draw.m_constEnd   = m_constEnd;
+			m_draw.m_flags |= m_flags;
+			m_renderItem[m_numRenderItems].draw = m_draw;
+			++m_numRenderItems;
 		}
 		}
 
 
-		m_state.clear();
+		m_draw.clear();
+		m_constBegin = m_constEnd;
 		m_flags = BGFX_STATE_NONE;
 		m_flags = BGFX_STATE_NONE;
 
 
 		return m_num;
 		return m_num;
 	}
 	}
 
 
+	uint32_t Frame::dispatch(uint8_t _id, ProgramHandle _handle, uint16_t _numX, uint16_t _numY, uint16_t _numZ)
+	{
+		if (m_discard)
+		{
+			discard();
+			return m_num;
+		}
+
+		if (BGFX_CONFIG_MAX_DRAW_CALLS-1 <= m_num)
+		{
+			++m_numDropped;
+			return m_num;
+		}
+
+		m_constEnd = m_constantBuffer->getPos();
+
+		m_compute.m_numX = bx::uint16_max(_numX, 1);
+		m_compute.m_numY = bx::uint16_max(_numY, 1);
+		m_compute.m_numZ = bx::uint16_max(_numZ, 1);
+		m_key.m_program = _handle.idx;
+		if (invalidHandle != m_key.m_program)
+		{
+			m_key.m_depth = 0;
+			m_key.m_view = _id;
+			m_key.m_seq = s_ctx->m_seq[_id] & s_ctx->m_seqMask[_id];
+			s_ctx->m_seq[_id]++;
+			uint64_t key = m_key.encodeCompute();
+			m_sortKeys[m_num] = key;
+			m_sortValues[m_num] = m_numRenderItems;
+			++m_num;
+
+			m_compute.m_constBegin = m_constBegin;
+			m_compute.m_constEnd   = m_constEnd;
+			m_renderItem[m_numRenderItems].compute = m_compute;
+			++m_numRenderItems;
+		}
+
+		m_compute.clear();
+		m_constBegin = m_constEnd;
+
+		return m_num;
+	}
+
 	void Frame::sort()
 	void Frame::sort()
 	{
 	{
 		bx::radixSort64(m_sortKeys, s_ctx->m_tempKeys, m_sortValues, s_ctx->m_tempValues, m_num);
 		bx::radixSort64(m_sortKeys, s_ctx->m_tempKeys, m_sortValues, s_ctx->m_tempValues, m_num);
@@ -821,6 +872,7 @@ namespace bgfx
 		CAPS_FLAGS(BGFX_CAPS_RENDERER_MULTITHREADED),
 		CAPS_FLAGS(BGFX_CAPS_RENDERER_MULTITHREADED),
 		CAPS_FLAGS(BGFX_CAPS_FRAGMENT_DEPTH),
 		CAPS_FLAGS(BGFX_CAPS_FRAGMENT_DEPTH),
 		CAPS_FLAGS(BGFX_CAPS_BLEND_INDEPENDENT),
 		CAPS_FLAGS(BGFX_CAPS_BLEND_INDEPENDENT),
+		CAPS_FLAGS(BGFX_CAPS_COMPUTE),
 #undef CAPS_FLAGS
 #undef CAPS_FLAGS
 	};
 	};
 
 
@@ -2113,6 +2165,19 @@ again:
 		return handle;
 		return handle;
 	}
 	}
 
 
+	ProgramHandle createProgram(ShaderHandle _vsh, bool _destroyShaders)
+	{
+		BGFX_CHECK_MAIN_THREAD();
+		ProgramHandle handle = s_ctx->createProgram(_vsh);
+
+		if (_destroyShaders)
+		{
+			destroyShader(_vsh);
+		}
+
+		return handle;
+	}
+
 	void destroyProgram(ProgramHandle _handle)
 	void destroyProgram(ProgramHandle _handle)
 	{
 	{
 		BGFX_CHECK_MAIN_THREAD();
 		BGFX_CHECK_MAIN_THREAD();
@@ -2592,6 +2657,24 @@ again:
 		return s_ctx->submitMask(_viewMask, _depth);
 		return s_ctx->submitMask(_viewMask, _depth);
 	}
 	}
 
 
+	void setImage(uint8_t _stage, UniformHandle _sampler, TextureHandle _handle, uint8_t _mip, TextureFormat::Enum _format, Access::Enum _access)
+	{
+		BGFX_CHECK_MAIN_THREAD();
+		s_ctx->setImage(_stage, _sampler, _handle, _mip, _format, _access);
+	}
+
+	void setImage(uint8_t _stage, UniformHandle _sampler, FrameBufferHandle _handle, uint8_t _attachment, TextureFormat::Enum _format, Access::Enum _access)
+	{
+		BGFX_CHECK_MAIN_THREAD();
+		s_ctx->setImage(_stage, _sampler, _handle, _attachment, _format, _access);
+	}
+
+	void dispatch(uint8_t _id, ProgramHandle _handle, uint16_t _numX, uint16_t _numY, uint16_t _numZ)
+	{
+		BGFX_CHECK_MAIN_THREAD();
+		s_ctx->dispatch(_id, _handle, _numX, _numY, _numZ);
+	}
+
 	void discard()
 	void discard()
 	{
 	{
 		BGFX_CHECK_MAIN_THREAD();
 		BGFX_CHECK_MAIN_THREAD();

+ 121 - 0
src/bgfx_compute.sh

@@ -0,0 +1,121 @@
+/*
+ * Copyright 2011-2014 Branimir Karadzic. All rights reserved.
+ * License: http://www.opensource.org/licenses/BSD-2-Clause
+ */
+
+#ifndef BGFX_COMPUTE_H_HEADER_GUARD
+#define BGFX_COMPUTE_H_HEADER_GUARD
+
+#ifndef __cplusplus
+
+#if BGFX_SHADER_LANGUAGE_HLSL
+
+#define IMAGE2D_RO(_name, _reg) Texture2D           _name : register(t[_reg])
+#define IMAGE2D_RW(_name, _reg) RWTexture2D<float4> _name : register(u[_reg])
+#define IMAGE2D_WR(_name, _reg) IMAGE2D_RW(_name, _reg)
+
+#define BUFFER_RO(_name, _struct, _reg) StructuredBuffer<_struct>   _name : register(b[_reg])
+#define BUFFER_RW(_name, _struct, _reg) RWStructuredBuffer<_struct> _name : register(b[_reg])
+#define BUFFER_WR(_name, _struct, _reg) BUFFER_RW(_name, _struct, _reg)
+
+#define NUM_THREADS(_x, _y, _z) [numthreads(_x, _y, _z)]
+
+vec4 imageLoad(Texture2D _image, ivec2 _uv)
+{
+	return _image.Load(uint3(_uv.xy, 0) );
+}
+
+ivec2 imageSize(Texture2D _image)
+{
+	ivec2 result;
+	_image.GetDimensions(result.x, result.y);
+	return result;
+}
+
+//vec4 imageLoad(RWTexture2D<float4> _image, ivec2 _uv)
+//{
+//	return _image[_uv];
+//}
+
+ivec2 imageSize(RWTexture2D<float4> _image)
+{
+	ivec2 result;
+	_image.GetDimensions(result.x, result.y);
+	return result;
+}
+
+void imageStore(RWTexture2D<float4> _image, ivec2 _uv, vec4 _rgba)
+{
+	_image[_uv] = _rgba;
+}
+
+#define __ATOMIC_IMPL_TYPE(_genType, _glFunc, _dxFunc) \
+			_genType _glFunc(_genType _mem, _genType _data) \
+			{ \
+				_genType result; \
+				_dxFunc(_mem, _data, result); \
+				return result; \
+			}
+
+#define __ATOMIC_IMPL(_glFunc, _dxFunc) \
+			__ATOMIC_IMPL_TYPE(int,  _glFunc, _dxFunc) \
+			__ATOMIC_IMPL_TYPE(uint, _glFunc, _dxFunc)
+
+__ATOMIC_IMPL(atomicAdd,      InterlockedAdd);
+__ATOMIC_IMPL(atomicAnd,      InterlockedAnd);
+__ATOMIC_IMPL(atomicExchange, InterlockedExchange);
+__ATOMIC_IMPL(atomicMax,      InterlockedMax);
+__ATOMIC_IMPL(atomicMin,      InterlockedMin);
+__ATOMIC_IMPL(atomicOr,       InterlockedOr);
+__ATOMIC_IMPL(atomicXor,      InterlockedXor);
+
+int atomicCompSwap(int _mem, int _compare, int _data)
+{
+	int result;
+	InterlockedCompareExchange(_mem, _compare, _data, result);
+	return result;
+}
+
+uint atomicCompSwap(uint _mem, uint _compare, uint _data)
+{
+	uint result;
+	InterlockedCompareExchange(_mem, _compare, _data, result);
+	return result;
+}
+
+// InterlockedCompareStore
+
+#define barrier()                    GroupMemoryBarrierWithGroupSync()
+#define memoryBarrier()              GroupMemoryBarrierWithGroupSync()
+#define memoryBarrierAtomicCounter() GroupMemoryBarrierWithGroupSync()
+#define memoryBarrierBuffer()        GroupMemoryBarrierWithGroupSync()
+#define memoryBarrierImage()         GroupMemoryBarrierWithGroupSync()
+#define memoryBarrierShared()        GroupMemoryBarrierWithGroupSync()
+#define groupMemoryBarrier()         GroupMemoryBarrierWithGroupSync()
+
+#else
+
+#define __IMAGE2D_XX(_name, _reg, _access) \
+			layout(rgba8, binding=_reg) _access uniform highp image2D _name
+
+#define IMAGE2D_RO(_name, _reg) __IMAGE2D_XX(_name, _reg, readonly)
+#define IMAGE2D_RW(_name, _reg) __IMAGE2D_XX(_name, _reg, readwrite)
+#define IMAGE2D_WR(_name, _reg) __IMAGE2D_XX(_name, _reg, writeonly)
+
+#define __BUFFER_XX(_name, _type, _reg, _access) \
+			layout(std430, binding=_reg) _access buffer _name ## Buffer \
+			{ \
+				_type _name[]; \
+			}
+
+#define BUFFER_RO(_name, _type, _reg) __BUFFER_XX(_name, _type, _reg, readonly)
+#define BUFFER_RW(_name, _type, _reg) __BUFFER_XX(_name, _type, _reg, readwrite)
+#define BUFFER_WR(_name, _type, _reg) __BUFFER_XX(_name, _type, _reg, writeonly)
+
+#define NUM_THREADS(_x, _y, _z) layout (local_size_x = _x, local_size_y = _y, local_size_z = _z) in;
+
+#endif // BGFX_SHADER_LANGUAGE_HLSL
+
+#endif // __cplusplus
+
+#endif // BGFX_COMPUTE_H_HEADER_GUARD

+ 225 - 73
src/bgfx_p.h

@@ -138,6 +138,8 @@ namespace stl
 #define BGFX_STATE_TEX_MASK  UINT64_C(0xff00000000000000)
 #define BGFX_STATE_TEX_MASK  UINT64_C(0xff00000000000000)
 #define BGFX_STATE_TEX_COUNT 8
 #define BGFX_STATE_TEX_COUNT 8
 
 
+#define BGFX_MAX_COMPUTE_BINDINGS 8
+
 #define BGFX_SAMPLER_DEFAULT_FLAGS UINT32_C(0x10000000)
 #define BGFX_SAMPLER_DEFAULT_FLAGS UINT32_C(0x10000000)
 
 
 #define BGFX_RENDERER_DIRECT3D9_NAME "Direct3D 9"
 #define BGFX_RENDERER_DIRECT3D9_NAME "Direct3D 9"
@@ -623,32 +625,60 @@ namespace bgfx
 		void operator=(const CommandBuffer&);
 		void operator=(const CommandBuffer&);
 	};
 	};
 
 
+#define SORT_KEY_RENDER_DRAW UINT64_C(0x0000000800000000)
 	struct SortKey
 	struct SortKey
 	{
 	{
-		uint64_t encode()
+		uint64_t encodeDraw()
+		{
+			// |               3               2               1               0|
+			// |fedcba9876543210fedcba9876543210fedcba9876543210fedcba9876543210|
+			// |            vvvvvsssssssssssdttpppppppppdddddddddddddddddddddddd|
+			// |                ^          ^^ ^        ^                       ^|
+			// |                |          || |        |                       ||
+			// |           view-+      seq-+| +-trans  +-program         depth-+|
+			// |                            +-draw                              |
+
+			const uint64_t depth   = m_depth;
+			const uint64_t program = uint64_t(m_program)<<0x18;
+			const uint64_t trans   = uint64_t(m_trans  )<<0x21;
+			const uint64_t seq     = uint64_t(m_seq    )<<0x24;
+			const uint64_t view    = uint64_t(m_view   )<<0x2f;
+			const uint64_t key     = depth|program|trans|SORT_KEY_RENDER_DRAW|seq|view;
+			return key;
+		}
+
+		uint64_t encodeCompute()
 		{
 		{
 			// |               3               2               1               0|
 			// |               3               2               1               0|
 			// |fedcba9876543210fedcba9876543210fedcba9876543210fedcba9876543210|
 			// |fedcba9876543210fedcba9876543210fedcba9876543210fedcba9876543210|
-			// |             vvvvvsssssssssssttmmmmmmmmmdddddddddddddddddddddddd|
-			// |                 ^          ^ ^        ^                       ^|
-			// |                 |          | |        |                       ||
-
-			const uint64_t tmp0 = m_depth;
-			const uint64_t tmp1 = uint64_t(m_program)<<0x18;
-			const uint64_t tmp2 = uint64_t(m_trans  )<<0x21;
-			const uint64_t tmp3 = uint64_t(m_seq    )<<0x23;
-			const uint64_t tmp4 = uint64_t(m_view   )<<0x2e;
-			const uint64_t key  = tmp0|tmp1|tmp2|tmp3|tmp4;
+			// |            vvvvvsssssssssssdppppppppp                          |
+			// |                ^          ^^        ^                          |
+			// |                |          ||        |                          |
+			// |           view-+      seq-+|        +-program                  |
+			// |                            +-draw                              |
+
+			const uint64_t program = uint64_t(m_program)<<0x1a;
+			const uint64_t seq     = uint64_t(m_seq    )<<0x24;
+			const uint64_t view    = uint64_t(m_view   )<<0x2f;
+			const uint64_t key     = program|seq|view;
 			return key;
 			return key;
 		}
 		}
 
 
-		void decode(uint64_t _key)
+		/// Returns true if item is command.
+		bool decode(uint64_t _key)
 		{
 		{
-			m_depth   =  _key       & 0xffffffff;
-			m_program = (_key>>0x18)&(BGFX_CONFIG_MAX_PROGRAMS-1);
-			m_trans   = (_key>>0x21)& 0x3;
-			m_seq     = (_key>>0x23)& 0x7ff;
-			m_view    = (_key>>0x2e)&(BGFX_CONFIG_MAX_VIEWS-1);
+			m_seq     = (_key>>0x24)& 0x7ff;
+			m_view    = (_key>>0x2f)&(BGFX_CONFIG_MAX_VIEWS-1);
+			if (_key & SORT_KEY_RENDER_DRAW)
+			{
+				m_depth   =  _key       & 0xffffffff;
+				m_program = (_key>>0x18)&(BGFX_CONFIG_MAX_PROGRAMS-1);
+				m_trans   = (_key>>0x21)& 0x3;
+				return false; // draw
+			}
+
+			m_program = (_key>>0x1a)&(BGFX_CONFIG_MAX_PROGRAMS-1);
+			return true; // compute
 		}
 		}
 
 
 		void reset()
 		void reset()
@@ -660,12 +690,13 @@ namespace bgfx
 			m_trans   = 0;
 			m_trans   = 0;
 		}
 		}
 
 
-		int32_t m_depth;
+		int32_t  m_depth;
 		uint16_t m_program;
 		uint16_t m_program;
 		uint16_t m_seq;
 		uint16_t m_seq;
-		uint8_t m_view;
-		uint8_t m_trans;
+		uint8_t  m_view;
+		uint8_t  m_trans;
 	};
 	};
+#undef SORT_KEY_CMD
 
 
 	BX_ALIGN_STRUCT_16(struct) Matrix4
 	BX_ALIGN_STRUCT_16(struct) Matrix4
 	{
 	{
@@ -749,12 +780,6 @@ namespace bgfx
 		uint32_t m_num;
 		uint32_t m_num;
 	};
 	};
 
 
-	struct Sampler
-	{
-		uint32_t m_flags;
-		uint16_t m_idx;
-	};
-
 #define CONSTANT_OPCODE_TYPE_SHIFT 27
 #define CONSTANT_OPCODE_TYPE_SHIFT 27
 #define CONSTANT_OPCODE_TYPE_MASK  UINT32_C(0xf8000000)
 #define CONSTANT_OPCODE_TYPE_MASK  UINT32_C(0xf8000000)
 #define CONSTANT_OPCODE_LOC_SHIFT  11
 #define CONSTANT_OPCODE_LOC_SHIFT  11
@@ -934,17 +959,18 @@ namespace bgfx
  		UniformHashMap m_uniforms;
  		UniformHashMap m_uniforms;
  	};
  	};
 
 
-	struct RenderState
+	struct Sampler
 	{
 	{
-		void reset()
-		{
-			m_constEnd = 0;
-			clear();
-		}
+		uint32_t m_flags;
+		uint16_t m_idx;
+	};
 
 
+	struct RenderDraw
+	{
 		void clear()
 		void clear()
 		{
 		{
-			m_constBegin = m_constEnd;
+			m_constBegin = 0;
+			m_constEnd   = 0;
 			m_flags = BGFX_STATE_DEFAULT;
 			m_flags = BGFX_STATE_DEFAULT;
 			m_stencil = packStencil(BGFX_STENCIL_DEFAULT, BGFX_STENCIL_DEFAULT);
 			m_stencil = packStencil(BGFX_STENCIL_DEFAULT, BGFX_STENCIL_DEFAULT);
 			m_rgba = 0;
 			m_rgba = 0;
@@ -987,12 +1013,61 @@ namespace bgfx
 		uint16_t m_scissor;
 		uint16_t m_scissor;
 
 
 		VertexBufferHandle m_vertexBuffer;
 		VertexBufferHandle m_vertexBuffer;
-		VertexDeclHandle m_vertexDecl;
-		IndexBufferHandle m_indexBuffer;
+		VertexDeclHandle   m_vertexDecl;
+		IndexBufferHandle  m_indexBuffer;
 		VertexBufferHandle m_instanceDataBuffer;
 		VertexBufferHandle m_instanceDataBuffer;
 		Sampler m_sampler[BGFX_STATE_TEX_COUNT];
 		Sampler m_sampler[BGFX_STATE_TEX_COUNT];
 	};
 	};
 
 
+	struct ComputeBinding
+	{
+		enum Enum
+		{
+			Image,
+			Buffer,
+			
+			Count
+		};
+
+		uint16_t m_idx;
+		uint8_t m_format;
+		uint8_t m_access;
+		uint8_t m_mip;
+		uint8_t m_type;
+	};
+
+	struct RenderCompute
+	{
+		void clear()
+		{
+			m_constBegin = 0;
+			m_constEnd   = 0;
+			m_numX = 0;
+			m_numY = 0;
+			m_numZ = 0;
+
+			for (uint32_t ii = 0; ii < BGFX_MAX_COMPUTE_BINDINGS; ++ii)
+			{
+				m_bind[ii].m_idx = invalidHandle;
+			}
+		}
+
+		uint32_t m_constBegin;
+		uint32_t m_constEnd;
+
+		uint16_t m_numX;
+		uint16_t m_numY;
+		uint16_t m_numZ;
+
+		ComputeBinding m_bind[BGFX_MAX_COMPUTE_BINDINGS];
+	};
+
+	union RenderItem
+	{
+		RenderDraw    draw;
+		RenderCompute compute;
+	};
+
 	struct Resolution
 	struct Resolution
 	{
 	{
 		Resolution()
 		Resolution()
@@ -1063,12 +1138,15 @@ namespace bgfx
 		void start()
 		void start()
 		{
 		{
 			m_flags = BGFX_STATE_NONE;
 			m_flags = BGFX_STATE_NONE;
-			m_state.reset();
+			m_constBegin = 0;
+			m_constEnd   = 0;
+			m_draw.clear();
+			m_compute.clear();
 			m_matrixCache.reset();
 			m_matrixCache.reset();
 			m_rectCache.reset();
 			m_rectCache.reset();
 			m_key.reset();
 			m_key.reset();
 			m_num = 0;
 			m_num = 0;
-			m_numRenderStates = 0;
+			m_numRenderItems = 0;
 			m_numDropped = 0;
 			m_numDropped = 0;
 			m_iboffset = 0;
 			m_iboffset = 0;
 			m_vboffset = 0;
 			m_vboffset = 0;
@@ -1105,86 +1183,86 @@ namespace bgfx
 			uint8_t blend = ( (_state&BGFX_STATE_BLEND_MASK)>>BGFX_STATE_BLEND_SHIFT)&0xff;
 			uint8_t blend = ( (_state&BGFX_STATE_BLEND_MASK)>>BGFX_STATE_BLEND_SHIFT)&0xff;
 			// transparency sort order table
 			// transparency sort order table
 			m_key.m_trans = "\x0\x1\x1\x2\x2\x1\x2\x1\x2\x1\x1\x1\x1\x1\x1\x1\x1\x1\x1"[( (blend)&0xf) + (!!blend)];
 			m_key.m_trans = "\x0\x1\x1\x2\x2\x1\x2\x1\x2\x1\x1\x1\x1\x1\x1\x1\x1\x1\x1"[( (blend)&0xf) + (!!blend)];
-			m_state.m_flags = _state;
-			m_state.m_rgba = _rgba;
+			m_draw.m_flags = _state;
+			m_draw.m_rgba = _rgba;
 		}
 		}
 
 
 		void setStencil(uint32_t _fstencil, uint32_t _bstencil)
 		void setStencil(uint32_t _fstencil, uint32_t _bstencil)
 		{
 		{
-			m_state.m_stencil = packStencil(_fstencil, _bstencil);
+			m_draw.m_stencil = packStencil(_fstencil, _bstencil);
 		}
 		}
 
 
 		uint16_t setScissor(uint16_t _x, uint16_t _y, uint16_t _width, uint16_t _height)
 		uint16_t setScissor(uint16_t _x, uint16_t _y, uint16_t _width, uint16_t _height)
 		{
 		{
 			uint16_t scissor = (uint16_t)m_rectCache.add(_x, _y, _width, _height);
 			uint16_t scissor = (uint16_t)m_rectCache.add(_x, _y, _width, _height);
-			m_state.m_scissor = scissor;
+			m_draw.m_scissor = scissor;
 			return scissor;
 			return scissor;
 		}
 		}
 
 
 		void setScissor(uint16_t _cache)
 		void setScissor(uint16_t _cache)
 		{
 		{
-			m_state.m_scissor = _cache;
+			m_draw.m_scissor = _cache;
 		}
 		}
 
 
 		uint32_t setTransform(const void* _mtx, uint16_t _num)
 		uint32_t setTransform(const void* _mtx, uint16_t _num)
 		{
 		{
-			m_state.m_matrix = m_matrixCache.add(_mtx, _num);
-			m_state.m_num = _num;
+			m_draw.m_matrix = m_matrixCache.add(_mtx, _num);
+			m_draw.m_num = _num;
 
 
-			return m_state.m_matrix;
+			return m_draw.m_matrix;
 		}
 		}
 
 
 		void setTransform(uint32_t _cache, uint16_t _num)
 		void setTransform(uint32_t _cache, uint16_t _num)
 		{
 		{
-			m_state.m_matrix = _cache;
-			m_state.m_num = _num;
+			m_draw.m_matrix = _cache;
+			m_draw.m_num = _num;
 		}
 		}
 
 
 		void setIndexBuffer(IndexBufferHandle _handle, uint32_t _firstIndex, uint32_t _numIndices)
 		void setIndexBuffer(IndexBufferHandle _handle, uint32_t _firstIndex, uint32_t _numIndices)
 		{
 		{
-			m_state.m_startIndex = _firstIndex;
-			m_state.m_numIndices = _numIndices;
-			m_state.m_indexBuffer = _handle;
+			m_draw.m_startIndex = _firstIndex;
+			m_draw.m_numIndices = _numIndices;
+			m_draw.m_indexBuffer = _handle;
 		}
 		}
 
 
 		void setIndexBuffer(const TransientIndexBuffer* _tib, uint32_t _firstIndex, uint32_t _numIndices)
 		void setIndexBuffer(const TransientIndexBuffer* _tib, uint32_t _firstIndex, uint32_t _numIndices)
 		{
 		{
-			m_state.m_indexBuffer = _tib->handle;
-			m_state.m_startIndex = _firstIndex;
-			m_state.m_numIndices = _numIndices;
+			m_draw.m_indexBuffer = _tib->handle;
+			m_draw.m_startIndex = _firstIndex;
+			m_draw.m_numIndices = _numIndices;
 			m_discard = 0 == _numIndices;
 			m_discard = 0 == _numIndices;
 		}
 		}
 
 
 		void setVertexBuffer(VertexBufferHandle _handle, uint32_t _startVertex, uint32_t _numVertices)
 		void setVertexBuffer(VertexBufferHandle _handle, uint32_t _startVertex, uint32_t _numVertices)
 		{
 		{
 			BX_CHECK(_handle.idx < BGFX_CONFIG_MAX_VERTEX_BUFFERS, "Invalid vertex buffer handle. %d (< %d)", _handle.idx, BGFX_CONFIG_MAX_VERTEX_BUFFERS);
 			BX_CHECK(_handle.idx < BGFX_CONFIG_MAX_VERTEX_BUFFERS, "Invalid vertex buffer handle. %d (< %d)", _handle.idx, BGFX_CONFIG_MAX_VERTEX_BUFFERS);
-			m_state.m_startVertex = _startVertex;
-			m_state.m_numVertices = _numVertices;
-			m_state.m_vertexBuffer = _handle;
+			m_draw.m_startVertex = _startVertex;
+			m_draw.m_numVertices = _numVertices;
+			m_draw.m_vertexBuffer = _handle;
 		}
 		}
 
 
 		void setVertexBuffer(const DynamicVertexBuffer& _dvb, uint32_t _numVertices)
 		void setVertexBuffer(const DynamicVertexBuffer& _dvb, uint32_t _numVertices)
 		{
 		{
-			m_state.m_startVertex = _dvb.m_startVertex;
-			m_state.m_numVertices = bx::uint32_min(_dvb.m_numVertices, _numVertices);
-			m_state.m_vertexBuffer = _dvb.m_handle;
-			m_state.m_vertexDecl = _dvb.m_decl;
+			m_draw.m_startVertex = _dvb.m_startVertex;
+			m_draw.m_numVertices = bx::uint32_min(_dvb.m_numVertices, _numVertices);
+			m_draw.m_vertexBuffer = _dvb.m_handle;
+			m_draw.m_vertexDecl = _dvb.m_decl;
 		}
 		}
 
 
 		void setVertexBuffer(const TransientVertexBuffer* _tvb, uint32_t _startVertex, uint32_t _numVertices)
 		void setVertexBuffer(const TransientVertexBuffer* _tvb, uint32_t _startVertex, uint32_t _numVertices)
 		{
 		{
-			m_state.m_startVertex = _startVertex;
-			m_state.m_numVertices = bx::uint32_min(_tvb->size/_tvb->stride, _numVertices);
-			m_state.m_vertexBuffer = _tvb->handle;
-			m_state.m_vertexDecl = _tvb->decl;
+			m_draw.m_startVertex = _startVertex;
+			m_draw.m_numVertices = bx::uint32_min(_tvb->size/_tvb->stride, _numVertices);
+			m_draw.m_vertexBuffer = _tvb->handle;
+			m_draw.m_vertexDecl = _tvb->decl;
 		}
 		}
 
 
 		void setInstanceDataBuffer(const InstanceDataBuffer* _idb, uint16_t _num)
 		void setInstanceDataBuffer(const InstanceDataBuffer* _idb, uint16_t _num)
 		{
 		{
- 			m_state.m_instanceDataOffset = _idb->offset;
-			m_state.m_instanceDataStride = _idb->stride;
-			m_state.m_numInstances = bx::uint16_min( (uint16_t)_idb->num, _num);
-			m_state.m_instanceDataBuffer = _idb->handle;
+ 			m_draw.m_instanceDataOffset = _idb->offset;
+			m_draw.m_instanceDataStride = _idb->stride;
+			m_draw.m_numInstances = bx::uint16_min( (uint16_t)_idb->num, _num);
+			m_draw.m_instanceDataBuffer = _idb->handle;
 			BX_FREE(g_allocator, const_cast<InstanceDataBuffer*>(_idb) );
 			BX_FREE(g_allocator, const_cast<InstanceDataBuffer*>(_idb) );
 		}
 		}
 
 
@@ -1197,7 +1275,7 @@ namespace bgfx
 		void setTexture(uint8_t _stage, UniformHandle _sampler, TextureHandle _handle, uint32_t _flags)
 		void setTexture(uint8_t _stage, UniformHandle _sampler, TextureHandle _handle, uint32_t _flags)
 		{
 		{
 			m_flags |= BGFX_STATE_TEX0<<_stage;
 			m_flags |= BGFX_STATE_TEX0<<_stage;
-			Sampler& sampler = m_state.m_sampler[_stage];
+			Sampler& sampler = m_draw.m_sampler[_stage];
 			sampler.m_idx = _handle.idx;
 			sampler.m_idx = _handle.idx;
 			sampler.m_flags = (_flags&BGFX_SAMPLER_DEFAULT_FLAGS) ? BGFX_SAMPLER_DEFAULT_FLAGS : _flags;
 			sampler.m_flags = (_flags&BGFX_SAMPLER_DEFAULT_FLAGS) ? BGFX_SAMPLER_DEFAULT_FLAGS : _flags;
 
 
@@ -1209,15 +1287,34 @@ namespace bgfx
 			}
 			}
 		}
 		}
 
 
+		void setImage(uint8_t _stage, UniformHandle _sampler, TextureHandle _handle, uint8_t _mip, TextureFormat::Enum _format, Access::Enum _access)
+		{
+			ComputeBinding& bind = m_compute.m_bind[_stage];
+			bind.m_idx     = _handle.idx;
+			bind.m_format  = uint8_t(_format);
+			bind.m_access  = uint8_t(_access);
+			bind.m_mip     = _mip;
+			bind.m_type    = uint8_t(ComputeBinding::Image);
+
+			if (isValid(_sampler)
+			&& (BX_ENABLED(BGFX_CONFIG_RENDERER_OPENGL) || BX_ENABLED(BGFX_CONFIG_RENDERER_OPENGLES) ) )
+			{
+				uint32_t stage = _stage;
+				setUniform(_sampler, &stage);
+			}
+		}
+
 		void discard()
 		void discard()
 		{
 		{
 			m_discard = false;
 			m_discard = false;
-			m_state.clear();
+			m_draw.clear();
+			m_compute.clear();
 			m_flags = BGFX_STATE_NONE;
 			m_flags = BGFX_STATE_NONE;
 		}
 		}
 
 
 		uint32_t submit(uint8_t _id, int32_t _depth);
 		uint32_t submit(uint8_t _id, int32_t _depth);
 		uint32_t submitMask(uint32_t _viewMask, int32_t _depth);
 		uint32_t submitMask(uint32_t _viewMask, int32_t _depth);
+		uint32_t dispatch(uint8_t _id, ProgramHandle _handle, uint16_t _ngx, uint16_t _ngy, uint16_t _ngz);
 		void sort();
 		void sort();
 
 
 		bool checkAvailTransientIndexBuffer(uint32_t _num)
 		bool checkAvailTransientIndexBuffer(uint32_t _num)
@@ -1332,14 +1429,17 @@ namespace bgfx
 
 
 		uint64_t m_sortKeys[BGFX_CONFIG_MAX_DRAW_CALLS];
 		uint64_t m_sortKeys[BGFX_CONFIG_MAX_DRAW_CALLS];
 		uint16_t m_sortValues[BGFX_CONFIG_MAX_DRAW_CALLS];
 		uint16_t m_sortValues[BGFX_CONFIG_MAX_DRAW_CALLS];
-		RenderState m_renderState[BGFX_CONFIG_MAX_DRAW_CALLS];
-		RenderState m_state;
+		RenderItem m_renderItem[BGFX_CONFIG_MAX_DRAW_CALLS];
+		RenderDraw m_draw;
+		RenderCompute m_compute;
 		uint64_t m_flags;
 		uint64_t m_flags;
+		uint32_t m_constBegin;
+		uint32_t m_constEnd;
 
 
 		ConstantBuffer* m_constantBuffer;
 		ConstantBuffer* m_constantBuffer;
 
 
 		uint16_t m_num;
 		uint16_t m_num;
-		uint16_t m_numRenderStates;
+		uint16_t m_numRenderItems;
 		uint16_t m_numDropped;
 		uint16_t m_numDropped;
 
 
 		MatrixCache m_matrixCache;
 		MatrixCache m_matrixCache;
@@ -2213,6 +2313,35 @@ namespace bgfx
 			return handle;
 			return handle;
 		}
 		}
 
 
+		BGFX_API_FUNC(ProgramHandle createProgram(ShaderHandle _vsh) )
+		{
+			if (!isValid(_vsh) )
+			{
+				BX_WARN(false, "Vertex/fragment shader is invalid (vsh %d).", _vsh.idx);
+				ProgramHandle invalid = BGFX_INVALID_HANDLE;
+				return invalid;
+			}
+
+			ProgramHandle handle;
+			handle.idx = m_programHandle.alloc();
+
+			BX_WARN(isValid(handle), "Failed to allocate program handle.");
+			if (isValid(handle) )
+			{
+				shaderIncRef(_vsh);
+				m_programRef[handle.idx].m_vsh = _vsh;
+
+				CommandBuffer& cmdbuf = getCommandBuffer(CommandBuffer::CreateProgram);
+				cmdbuf.write(handle);
+				cmdbuf.write(_vsh);
+
+				ShaderHandle invalid = BGFX_INVALID_HANDLE;
+				cmdbuf.write(invalid);
+			}
+
+			return handle;
+		}
+
 		BGFX_API_FUNC(void destroyProgram(ProgramHandle _handle) )
 		BGFX_API_FUNC(void destroyProgram(ProgramHandle _handle) )
 		{
 		{
 			CommandBuffer& cmdbuf = getCommandBuffer(CommandBuffer::DestroyProgram);
 			CommandBuffer& cmdbuf = getCommandBuffer(CommandBuffer::DestroyProgram);
@@ -2687,6 +2816,29 @@ namespace bgfx
 			return m_submit->submitMask(_viewMask, _depth);
 			return m_submit->submitMask(_viewMask, _depth);
 		}
 		}
 
 
+		BGFX_API_FUNC(void setImage(uint8_t _stage, UniformHandle _sampler, TextureHandle _handle, uint8_t _mip, TextureFormat::Enum _format, Access::Enum _access) )
+		{
+			m_submit->setImage(_stage, _sampler, _handle, _mip, _format, _access);
+		}
+
+		BGFX_API_FUNC(void setImage(uint8_t _stage, UniformHandle _sampler, FrameBufferHandle _handle, uint8_t _attachment, TextureFormat::Enum _format, Access::Enum _access) )
+		{
+			BX_CHECK(_attachment < g_caps.maxFBAttachments, "Frame buffer attachment index %d is invalid.", _attachment);
+			TextureHandle textureHandle = BGFX_INVALID_HANDLE;
+			if (isValid(_handle) )
+			{
+				textureHandle = m_frameBufferRef[_handle.idx].m_th[_attachment];
+				BX_CHECK(isValid(textureHandle), "Frame buffer texture %d is invalid.", _attachment);
+			}
+
+			setImage(_stage, _sampler, textureHandle, 0, _format, _access);
+		}
+
+		BGFX_API_FUNC(uint32_t dispatch(uint8_t _id, ProgramHandle _handle, uint16_t _numX, uint16_t _numY, uint16_t _numZ) )
+		{
+			return m_submit->dispatch(_id, _handle, _numX, _numY, _numZ);
+		}
+
 		BGFX_API_FUNC(void discard() )
 		BGFX_API_FUNC(void discard() )
 		{
 		{
 			m_submit->discard();
 			m_submit->discard();

+ 2 - 0
src/config.h

@@ -135,6 +135,7 @@
 #endif //  BGFX_CONFIG_MAX_RECT_CACHE
 #endif //  BGFX_CONFIG_MAX_RECT_CACHE
 
 
 #ifndef BGFX_CONFIG_MAX_VIEWS
 #ifndef BGFX_CONFIG_MAX_VIEWS
+// Do not change. Must be power of 2.
 #	define BGFX_CONFIG_MAX_VIEWS 32
 #	define BGFX_CONFIG_MAX_VIEWS 32
 #endif // BGFX_CONFIG_MAX_VIEWS
 #endif // BGFX_CONFIG_MAX_VIEWS
 
 
@@ -171,6 +172,7 @@
 #endif // BGFX_CONFIG_MAX_FRAGMENT_SHADERS
 #endif // BGFX_CONFIG_MAX_FRAGMENT_SHADERS
 
 
 #ifndef BGFX_CONFIG_MAX_PROGRAMS
 #ifndef BGFX_CONFIG_MAX_PROGRAMS
+// Must be power of 2.
 #	define BGFX_CONFIG_MAX_PROGRAMS 512
 #	define BGFX_CONFIG_MAX_PROGRAMS 512
 #endif // BGFX_CONFIG_MAX_PROGRAMS
 #endif // BGFX_CONFIG_MAX_PROGRAMS
 
 

+ 16 - 2
src/glcontext_egl.cpp

@@ -10,6 +10,14 @@
 
 
 #	if BGFX_USE_EGL
 #	if BGFX_USE_EGL
 
 
+#ifndef EGL_CONTEXT_MAJOR_VERSION_KHR
+#	define EGL_CONTEXT_MAJOR_VERSION_KHR EGL_CONTEXT_CLIENT_VERSION
+#endif // EGL_CONTEXT_MAJOR_VERSION_KHR
+
+#ifndef EGL_CONTEXT_MINOR_VERSION_KHR
+#	define EGL_CONTEXT_MINOR_VERSION_KHR 0x30FB
+#endif // EGL_CONTEXT_MINOR_VERSION_KHR
+
 namespace bgfx
 namespace bgfx
 {
 {
 #if BGFX_USE_GL_DYNAMIC_LIB
 #if BGFX_USE_GL_DYNAMIC_LIB
@@ -137,9 +145,15 @@ EGL_IMPORT
 		EGLint contextAttrs[] =
 		EGLint contextAttrs[] =
 		{
 		{
 #	if BGFX_CONFIG_RENDERER_OPENGLES >= 30
 #	if BGFX_CONFIG_RENDERER_OPENGLES >= 30
-			EGL_CONTEXT_CLIENT_VERSION, 3,
+			EGL_CONTEXT_MAJOR_VERSION_KHR, 3,
+#		if BGFX_CONFIG_RENDERER_OPENGLES >= 31
+			EGL_CONTEXT_MINOR_VERSION_KHR, 1,
+#		else
+//			EGL_CONTEXT_MINOR_VERSION_KHR, 0,
+#		endif // BGFX_CONFIG_RENDERER_OPENGLES >= 31
 #	elif BGFX_CONFIG_RENDERER_OPENGLES
 #	elif BGFX_CONFIG_RENDERER_OPENGLES
-			EGL_CONTEXT_CLIENT_VERSION, 2,
+			EGL_CONTEXT_MAJOR_VERSION_KHR, 2,
+//			EGL_CONTEXT_MINOR_VERSION_KHR, 0,
 #	endif // BGFX_CONFIG_RENDERER_
 #	endif // BGFX_CONFIG_RENDERER_
 
 
 			EGL_NONE
 			EGL_NONE

+ 37 - 1
src/glimports.h

@@ -32,13 +32,15 @@
 
 
 #if GL_IMPORT_TYPEDEFS
 #if GL_IMPORT_TYPEDEFS
 typedef void (GL_APIENTRYP GLDEBUGPROC)(GLenum source,GLenum type,GLuint id,GLenum severity,GLsizei length,const GLchar *message,const void *userParam);
 typedef void (GL_APIENTRYP GLDEBUGPROC)(GLenum source,GLenum type,GLuint id,GLenum severity,GLsizei length,const GLchar *message,const void *userParam);
-
 typedef void           (GL_APIENTRYP PFNGLACTIVETEXTUREPROC) (GLenum texture);
 typedef void           (GL_APIENTRYP PFNGLACTIVETEXTUREPROC) (GLenum texture);
 typedef void           (GL_APIENTRYP PFNGLATTACHSHADERPROC) (GLuint program, GLuint shader);
 typedef void           (GL_APIENTRYP PFNGLATTACHSHADERPROC) (GLuint program, GLuint shader);
 typedef void           (GL_APIENTRYP PFNGLBEGINQUERYPROC) (GLenum target, GLuint id);
 typedef void           (GL_APIENTRYP PFNGLBEGINQUERYPROC) (GLenum target, GLuint id);
 typedef void           (GL_APIENTRYP PFNGLBINDBUFFERPROC) (GLenum target, GLuint buffer);
 typedef void           (GL_APIENTRYP PFNGLBINDBUFFERPROC) (GLenum target, GLuint buffer);
+typedef void           (GL_APIENTRYP PFNGLBINDBUFFERBASEPROC) (GLenum target, GLuint index, GLuint buffer);
+typedef void           (GL_APIENTRYP PFNGLBINDBUFFERRANGEPROC) (GLenum target, GLuint index, GLuint buffer, GLintptr offset, GLsizeiptr size);
 typedef void           (GL_APIENTRYP PFNGLBINDFRAGDATALOCATIONPROC) (GLuint program, GLuint color, const GLchar *name);
 typedef void           (GL_APIENTRYP PFNGLBINDFRAGDATALOCATIONPROC) (GLuint program, GLuint color, const GLchar *name);
 typedef void           (GL_APIENTRYP PFNGLBINDFRAMEBUFFERPROC) (GLenum target, GLuint framebuffer);
 typedef void           (GL_APIENTRYP PFNGLBINDFRAMEBUFFERPROC) (GLenum target, GLuint framebuffer);
+typedef void           (GL_APIENTRYP PFNGLBINDIMAGETEXTUREPROC) (GLuint unit, GLuint texture, GLint level, GLboolean layered, GLint layer, GLenum access, GLenum format);
 typedef void           (GL_APIENTRYP PFNGLBINDRENDERBUFFERPROC) (GLenum target, GLuint renderbuffer);
 typedef void           (GL_APIENTRYP PFNGLBINDRENDERBUFFERPROC) (GLenum target, GLuint renderbuffer);
 typedef void           (GL_APIENTRYP PFNGLBINDSAMPLERPROC) (GLuint unit, GLuint sampler);
 typedef void           (GL_APIENTRYP PFNGLBINDSAMPLERPROC) (GLuint unit, GLuint sampler);
 typedef void           (GL_APIENTRYP PFNGLBINDTEXTUREPROC) (GLenum target, GLuint texture);
 typedef void           (GL_APIENTRYP PFNGLBINDTEXTUREPROC) (GLenum target, GLuint texture);
@@ -88,6 +90,8 @@ typedef void           (GL_APIENTRYP PFNGLDETACHSHADERPROC) (GLuint program, GLu
 typedef void           (GL_APIENTRYP PFNGLDISABLEPROC) (GLenum cap);
 typedef void           (GL_APIENTRYP PFNGLDISABLEPROC) (GLenum cap);
 typedef void           (GL_APIENTRYP PFNGLDISABLEIPROC) (GLenum cap, GLuint index);
 typedef void           (GL_APIENTRYP PFNGLDISABLEIPROC) (GLenum cap, GLuint index);
 typedef void           (GL_APIENTRYP PFNGLDISABLEVERTEXATTRIBARRAYPROC) (GLuint index);
 typedef void           (GL_APIENTRYP PFNGLDISABLEVERTEXATTRIBARRAYPROC) (GLuint index);
+typedef void           (GL_APIENTRYP PFNGLDISPATCHCOMPUTEPROC) (GLuint num_groups_x, GLuint num_groups_y, GLuint num_groups_z);
+typedef void           (GL_APIENTRYP PFNGLDISPATCHCOMPUTEINDIRECTPROC) (GLintptr indirect);
 typedef void           (GL_APIENTRYP PFNGLDRAWARRAYSPROC) (GLenum mode, GLint first, GLsizei count);
 typedef void           (GL_APIENTRYP PFNGLDRAWARRAYSPROC) (GLenum mode, GLint first, GLsizei count);
 typedef void           (GL_APIENTRYP PFNGLDRAWARRAYSINSTANCEDPROC) (GLenum mode, GLint first, GLsizei count, GLsizei instancecount);
 typedef void           (GL_APIENTRYP PFNGLDRAWARRAYSINSTANCEDPROC) (GLenum mode, GLint first, GLsizei count, GLsizei instancecount);
 typedef void           (GL_APIENTRYP PFNGLDRAWBUFFERPROC) (GLenum mode);
 typedef void           (GL_APIENTRYP PFNGLDRAWBUFFERPROC) (GLenum mode);
@@ -120,6 +124,12 @@ typedef void           (GL_APIENTRYP PFNGLGETPOINTERVPROC) (GLenum pname, void *
 typedef void           (GL_APIENTRYP PFNGLGETPROGRAMBINARYPROC) (GLuint program, GLsizei bufSize, GLsizei *length, GLenum *binaryFormat, void *binary);
 typedef void           (GL_APIENTRYP PFNGLGETPROGRAMBINARYPROC) (GLuint program, GLsizei bufSize, GLsizei *length, GLenum *binaryFormat, void *binary);
 typedef void           (GL_APIENTRYP PFNGLGETPROGRAMINFOLOGPROC) (GLuint program, GLsizei bufSize, GLsizei *length, GLchar *infoLog);
 typedef void           (GL_APIENTRYP PFNGLGETPROGRAMINFOLOGPROC) (GLuint program, GLsizei bufSize, GLsizei *length, GLchar *infoLog);
 typedef void           (GL_APIENTRYP PFNGLGETPROGRAMIVPROC) (GLuint program, GLenum pname, GLint *params);
 typedef void           (GL_APIENTRYP PFNGLGETPROGRAMIVPROC) (GLuint program, GLenum pname, GLint *params);
+typedef void           (GL_APIENTRYP PFNGLGETPROGRAMINTERFACEIVPROC) (GLuint program, GLenum programInterface, GLenum pname, GLint *params);
+typedef GLuint         (GL_APIENTRYP PFNGLGETPROGRAMRESOURCEINDEXPROC) (GLuint program, GLenum programInterface, const GLchar *name);
+typedef void           (GL_APIENTRYP PFNGLGETPROGRAMRESOURCEIVPROC) (GLuint program, GLenum programInterface, GLuint index, GLsizei propCount, const GLenum *props, GLsizei bufSize, GLsizei *length, GLint *params);
+typedef void           (GL_APIENTRYP PFNGLGETPROGRAMRESOURCENAMEPROC) (GLuint program, GLenum programInterface, GLuint index, GLsizei bufSize, GLsizei *length, GLchar *name);
+typedef GLint          (GL_APIENTRYP PFNGLGETPROGRAMRESOURCELOCATIONPROC) (GLuint program, GLenum programInterface, const GLchar *name);
+typedef GLint          (GL_APIENTRYP PFNGLGETPROGRAMRESOURCELOCATIONINDEXPROC) (GLuint program, GLenum programInterface, const GLchar *name);
 typedef void           (GL_APIENTRYP PFNGLGETQUERYIVPROC) (GLenum target, GLenum pname, GLint *params);
 typedef void           (GL_APIENTRYP PFNGLGETQUERYIVPROC) (GLenum target, GLenum pname, GLint *params);
 typedef void           (GL_APIENTRYP PFNGLGETQUERYOBJECTIVPROC) (GLuint id, GLenum pname, GLint *params);
 typedef void           (GL_APIENTRYP PFNGLGETQUERYOBJECTIVPROC) (GLuint id, GLenum pname, GLint *params);
 typedef void           (GL_APIENTRYP PFNGLGETQUERYOBJECTI64VPROC) (GLuint id, GLenum pname, GLint64 *params);
 typedef void           (GL_APIENTRYP PFNGLGETQUERYOBJECTI64VPROC) (GLuint id, GLenum pname, GLint64 *params);
@@ -130,6 +140,7 @@ typedef void           (GL_APIENTRYP PFNGLGETSHADERIVPROC) (GLuint shader, GLenu
 typedef const GLubyte* (GL_APIENTRYP PFNGLGETSTRINGPROC) (GLenum name);
 typedef const GLubyte* (GL_APIENTRYP PFNGLGETSTRINGPROC) (GLenum name);
 typedef GLint          (GL_APIENTRYP PFNGLGETUNIFORMLOCATIONPROC) (GLuint program, const GLchar *name);
 typedef GLint          (GL_APIENTRYP PFNGLGETUNIFORMLOCATIONPROC) (GLuint program, const GLchar *name);
 typedef void           (GL_APIENTRYP PFNGLLINKPROGRAMPROC) (GLuint program);
 typedef void           (GL_APIENTRYP PFNGLLINKPROGRAMPROC) (GLuint program);
+typedef void           (GL_APIENTRYP PFNGLMEMORYBARRIERPROC) (GLbitfield barriers);
 typedef void           (GL_APIENTRYP PFNGLOBJECTLABELPROC) (GLenum identifier, GLuint name, GLsizei length, const GLchar *label);
 typedef void           (GL_APIENTRYP PFNGLOBJECTLABELPROC) (GLenum identifier, GLuint name, GLsizei length, const GLchar *label);
 typedef void           (GL_APIENTRYP PFNGLOBJECTPTRLABELPROC) (const void *ptr, GLsizei length, const GLchar *label);
 typedef void           (GL_APIENTRYP PFNGLOBJECTPTRLABELPROC) (const void *ptr, GLsizei length, const GLchar *label);
 typedef void           (GL_APIENTRYP PFNGLPIXELSTOREIPROC) (GLenum pname, GLint param);
 typedef void           (GL_APIENTRYP PFNGLPIXELSTOREIPROC) (GLenum pname, GLint param);
@@ -194,8 +205,11 @@ GL_IMPORT______(false, PFNGLACTIVETEXTUREPROC,                     glActiveTextu
 GL_IMPORT______(false, PFNGLATTACHSHADERPROC,                      glAttachShader);
 GL_IMPORT______(false, PFNGLATTACHSHADERPROC,                      glAttachShader);
 GL_IMPORT______(true,  PFNGLBEGINQUERYPROC,                        glBeginQuery);
 GL_IMPORT______(true,  PFNGLBEGINQUERYPROC,                        glBeginQuery);
 GL_IMPORT______(false, PFNGLBINDBUFFERPROC,                        glBindBuffer);
 GL_IMPORT______(false, PFNGLBINDBUFFERPROC,                        glBindBuffer);
+GL_IMPORT______(true,  PFNGLBINDBUFFERBASEPROC,                    glBindBufferBase);
+GL_IMPORT______(true,  PFNGLBINDBUFFERRANGEPROC,                   glBindBufferRange);
 GL_IMPORT______(true,  PFNGLBINDFRAGDATALOCATIONPROC,              glBindFragDataLocation);
 GL_IMPORT______(true,  PFNGLBINDFRAGDATALOCATIONPROC,              glBindFragDataLocation);
 GL_IMPORT______(true,  PFNGLBINDFRAMEBUFFERPROC,                   glBindFramebuffer);
 GL_IMPORT______(true,  PFNGLBINDFRAMEBUFFERPROC,                   glBindFramebuffer);
+GL_IMPORT______(true,  PFNGLBINDIMAGETEXTUREPROC,                  glBindImageTexture);
 GL_IMPORT______(true,  PFNGLBINDRENDERBUFFERPROC,                  glBindRenderbuffer);
 GL_IMPORT______(true,  PFNGLBINDRENDERBUFFERPROC,                  glBindRenderbuffer);
 GL_IMPORT______(true,  PFNGLBINDSAMPLERPROC,                       glBindSampler);
 GL_IMPORT______(true,  PFNGLBINDSAMPLERPROC,                       glBindSampler);
 GL_IMPORT______(false, PFNGLBINDTEXTUREPROC,                       glBindTexture);
 GL_IMPORT______(false, PFNGLBINDTEXTUREPROC,                       glBindTexture);
@@ -243,6 +257,8 @@ GL_IMPORT______(false, PFNGLDETACHSHADERPROC,                      glDetachShade
 GL_IMPORT______(false, PFNGLDISABLEPROC,                           glDisable);
 GL_IMPORT______(false, PFNGLDISABLEPROC,                           glDisable);
 GL_IMPORT______(true,  PFNGLDISABLEIPROC,                          glDisablei);
 GL_IMPORT______(true,  PFNGLDISABLEIPROC,                          glDisablei);
 GL_IMPORT______(false, PFNGLDISABLEVERTEXATTRIBARRAYPROC,          glDisableVertexAttribArray);
 GL_IMPORT______(false, PFNGLDISABLEVERTEXATTRIBARRAYPROC,          glDisableVertexAttribArray);
+GL_IMPORT______(true,  PFNGLDISPATCHCOMPUTEPROC,                   glDispatchCompute);
+GL_IMPORT______(true,  PFNGLDISPATCHCOMPUTEINDIRECTPROC,           glDispatchComputeIndirect);
 GL_IMPORT______(false, PFNGLDRAWARRAYSPROC,                        glDrawArrays);
 GL_IMPORT______(false, PFNGLDRAWARRAYSPROC,                        glDrawArrays);
 GL_IMPORT______(true,  PFNGLDRAWARRAYSINSTANCEDPROC,               glDrawArraysInstanced);
 GL_IMPORT______(true,  PFNGLDRAWARRAYSINSTANCEDPROC,               glDrawArraysInstanced);
 GL_IMPORT______(true,  PFNGLDRAWBUFFERPROC,                        glDrawBuffer);
 GL_IMPORT______(true,  PFNGLDRAWBUFFERPROC,                        glDrawBuffer);
@@ -275,6 +291,12 @@ GL_IMPORT______(true,  PFNGLGETPOINTERVPROC,                       glGetPointerv
 GL_IMPORT______(true,  PFNGLGETPROGRAMBINARYPROC,                  glGetProgramBinary);
 GL_IMPORT______(true,  PFNGLGETPROGRAMBINARYPROC,                  glGetProgramBinary);
 GL_IMPORT______(false, PFNGLGETPROGRAMIVPROC,                      glGetProgramiv);
 GL_IMPORT______(false, PFNGLGETPROGRAMIVPROC,                      glGetProgramiv);
 GL_IMPORT______(false, PFNGLGETPROGRAMINFOLOGPROC,                 glGetProgramInfoLog);
 GL_IMPORT______(false, PFNGLGETPROGRAMINFOLOGPROC,                 glGetProgramInfoLog);
+GL_IMPORT______(true,  PFNGLGETPROGRAMINTERFACEIVPROC,             glGetProgramInterfaceiv);
+GL_IMPORT______(true,  PFNGLGETPROGRAMRESOURCEINDEXPROC,           glGetProgramResourceIndex);
+GL_IMPORT______(true,  PFNGLGETPROGRAMRESOURCEIVPROC,              glGetProgramResourceiv);
+GL_IMPORT______(true,  PFNGLGETPROGRAMRESOURCENAMEPROC,            glGetProgramResourceName);
+GL_IMPORT______(true,  PFNGLGETPROGRAMRESOURCELOCATIONPROC,        glGetProgramResourceLocation);
+GL_IMPORT______(true,  PFNGLGETPROGRAMRESOURCELOCATIONINDEXPROC,   glGetProgramResourceLocationIndex);
 GL_IMPORT______(true,  PFNGLGETQUERYIVPROC,                        glGetQueryiv);
 GL_IMPORT______(true,  PFNGLGETQUERYIVPROC,                        glGetQueryiv);
 GL_IMPORT______(true,  PFNGLGETQUERYOBJECTIVPROC,                  glGetQueryObjectiv);
 GL_IMPORT______(true,  PFNGLGETQUERYOBJECTIVPROC,                  glGetQueryObjectiv);
 GL_IMPORT______(true,  PFNGLGETQUERYOBJECTI64VPROC,                glGetQueryObjecti64v);
 GL_IMPORT______(true,  PFNGLGETQUERYOBJECTI64VPROC,                glGetQueryObjecti64v);
@@ -285,6 +307,7 @@ GL_IMPORT______(false, PFNGLGETSHADERINFOLOGPROC,                  glGetShaderIn
 GL_IMPORT______(false, PFNGLGETSTRINGPROC,                         glGetString);
 GL_IMPORT______(false, PFNGLGETSTRINGPROC,                         glGetString);
 GL_IMPORT______(false, PFNGLGETUNIFORMLOCATIONPROC,                glGetUniformLocation);
 GL_IMPORT______(false, PFNGLGETUNIFORMLOCATIONPROC,                glGetUniformLocation);
 GL_IMPORT______(false, PFNGLLINKPROGRAMPROC,                       glLinkProgram);
 GL_IMPORT______(false, PFNGLLINKPROGRAMPROC,                       glLinkProgram);
+GL_IMPORT______(true,  PFNGLMEMORYBARRIERPROC,                     glMemoryBarrier);
 GL_IMPORT______(true,  PFNGLOBJECTLABELPROC,                       glObjectLabel);
 GL_IMPORT______(true,  PFNGLOBJECTLABELPROC,                       glObjectLabel);
 GL_IMPORT______(true,  PFNGLOBJECTPTRLABELPROC,                    glObjectPtrLabel);
 GL_IMPORT______(true,  PFNGLOBJECTPTRLABELPROC,                    glObjectPtrLabel);
 GL_IMPORT______(false, PFNGLPIXELSTOREIPROC,                       glPixelStorei);
 GL_IMPORT______(false, PFNGLPIXELSTOREIPROC,                       glPixelStorei);
@@ -430,6 +453,19 @@ GL_IMPORT_____x(true,  PFNGLBINDSAMPLERPROC,                       glBindSampler
 GL_IMPORT_____x(true,  PFNGLSAMPLERPARAMETERFPROC,                 glSamplerParameterf);
 GL_IMPORT_____x(true,  PFNGLSAMPLERPARAMETERFPROC,                 glSamplerParameterf);
 GL_IMPORT_____x(true,  PFNGLSAMPLERPARAMETERIPROC,                 glSamplerParameteri);
 GL_IMPORT_____x(true,  PFNGLSAMPLERPARAMETERIPROC,                 glSamplerParameteri);
 
 
+GL_IMPORT_____x(true,  PFNGLBINDBUFFERBASEPROC,                    glBindBufferBase);
+GL_IMPORT_____x(true,  PFNGLBINDBUFFERRANGEPROC,                   glBindBufferRange);
+GL_IMPORT_____x(true,  PFNGLBINDIMAGETEXTUREPROC,                  glBindImageTexture);
+GL_IMPORT_____x(true,  PFNGLGETPROGRAMINTERFACEIVPROC,             glGetProgramInterfaceiv);
+GL_IMPORT_____x(true,  PFNGLGETPROGRAMRESOURCEINDEXPROC,           glGetProgramResourceIndex);
+GL_IMPORT_____x(true,  PFNGLGETPROGRAMRESOURCEIVPROC,              glGetProgramResourceiv);
+GL_IMPORT_____x(true,  PFNGLGETPROGRAMRESOURCENAMEPROC,            glGetProgramResourceName);
+GL_IMPORT_____x(true,  PFNGLGETPROGRAMRESOURCELOCATIONPROC,        glGetProgramResourceLocation);
+GL_IMPORT_____x(true,  PFNGLGETPROGRAMRESOURCELOCATIONINDEXPROC,   glGetProgramResourceLocationIndex);
+GL_IMPORT_____x(true,  PFNGLMEMORYBARRIERPROC,                     glMemoryBarrier);
+GL_IMPORT_____x(true,  PFNGLDISPATCHCOMPUTEPROC,                   glDispatchCompute);
+GL_IMPORT_____x(true,  PFNGLDISPATCHCOMPUTEINDIRECTPROC,           glDispatchComputeIndirect);
+
 GL_IMPORT_NV___(true,  PFNGLDRAWBUFFERSPROC,                       glDrawBuffers);
 GL_IMPORT_NV___(true,  PFNGLDRAWBUFFERSPROC,                       glDrawBuffers);
 GL_IMPORT_NV___(true,  PFNGLGENQUERIESPROC,                        glGenQueries);
 GL_IMPORT_NV___(true,  PFNGLGENQUERIESPROC,                        glGenQueries);
 GL_IMPORT_NV___(true,  PFNGLDELETEQUERIESPROC,                     glDeleteQueries);
 GL_IMPORT_NV___(true,  PFNGLDELETEQUERIESPROC,                     glDeleteQueries);

+ 4 - 2
src/image.cpp

@@ -10,7 +10,7 @@
 
 
 namespace bgfx
 namespace bgfx
 {
 {
-	static const ImageBlockInfo s_imageBlockInfo[TextureFormat::Count] =
+	static const ImageBlockInfo s_imageBlockInfo[] =
 	{
 	{
 		{  4, 4, 4,  8 }, // BC1
 		{  4, 4, 4,  8 }, // BC1
 		{  8, 4, 4, 16 }, // BC2
 		{  8, 4, 4, 16 }, // BC2
@@ -48,8 +48,9 @@ namespace bgfx
 		{ 32, 1, 1,  4 }, // D32F
 		{ 32, 1, 1,  4 }, // D32F
 		{  8, 1, 1,  1 }, // D0S8
 		{  8, 1, 1,  1 }, // D0S8
 	};
 	};
+	BX_STATIC_ASSERT(TextureFormat::Count == BX_COUNTOF(s_imageBlockInfo) );
 
 
-	static const char* s_textureFormatName[TextureFormat::Count] =
+	static const char* s_textureFormatName[] =
 	{
 	{
 		"BC1",       // BC1
 		"BC1",       // BC1
 		"BC2",       // BC2
 		"BC2",       // BC2
@@ -87,6 +88,7 @@ namespace bgfx
 		"D32F",      // D32F
 		"D32F",      // D32F
 		"D0S8",      // D0S8
 		"D0S8",      // D0S8
 	};
 	};
+	BX_STATIC_ASSERT(TextureFormat::Count == BX_COUNTOF(s_textureFormatName) );
 
 
 	bool isCompressed(TextureFormat::Enum _format)
 	bool isCompressed(TextureFormat::Enum _format)
 	{
 	{

+ 261 - 90
src/renderer_d3d11.cpp

@@ -197,7 +197,7 @@ namespace bgfx
 #	define DXGI_FORMAT_B4G4R4A4_UNORM DXGI_FORMAT(115)
 #	define DXGI_FORMAT_B4G4R4A4_UNORM DXGI_FORMAT(115)
 #endif // DXGI_FORMAT_B4G4R4A4_UNORM
 #endif // DXGI_FORMAT_B4G4R4A4_UNORM
 
 
-	static const TextureFormatInfo s_textureFormat[TextureFormat::Count] =
+	static const TextureFormatInfo s_textureFormat[] =
 	{
 	{
 		{ DXGI_FORMAT_BC1_UNORM,          DXGI_FORMAT_BC1_UNORM,             DXGI_FORMAT_UNKNOWN           }, // BC1 
 		{ DXGI_FORMAT_BC1_UNORM,          DXGI_FORMAT_BC1_UNORM,             DXGI_FORMAT_UNKNOWN           }, // BC1 
 		{ DXGI_FORMAT_BC2_UNORM,          DXGI_FORMAT_BC2_UNORM,             DXGI_FORMAT_UNKNOWN           }, // BC2
 		{ DXGI_FORMAT_BC2_UNORM,          DXGI_FORMAT_BC2_UNORM,             DXGI_FORMAT_UNKNOWN           }, // BC2
@@ -235,6 +235,7 @@ namespace bgfx
 		{ DXGI_FORMAT_R32_TYPELESS,       DXGI_FORMAT_R32_FLOAT,             DXGI_FORMAT_D32_FLOAT         }, // D32F
 		{ DXGI_FORMAT_R32_TYPELESS,       DXGI_FORMAT_R32_FLOAT,             DXGI_FORMAT_D32_FLOAT         }, // D32F
 		{ DXGI_FORMAT_R24G8_TYPELESS,     DXGI_FORMAT_R24_UNORM_X8_TYPELESS, DXGI_FORMAT_D24_UNORM_S8_UINT }, // D0S8
 		{ DXGI_FORMAT_R24G8_TYPELESS,     DXGI_FORMAT_R24_UNORM_X8_TYPELESS, DXGI_FORMAT_D24_UNORM_S8_UINT }, // D0S8
 	};
 	};
+	BX_STATIC_ASSERT(TextureFormat::Count == BX_COUNTOF(s_textureFormat) );
 
 
 	static const D3D11_INPUT_ELEMENT_DESC s_attrib[Attrib::Count] =
 	static const D3D11_INPUT_ELEMENT_DESC s_attrib[Attrib::Count] =
 	{
 	{
@@ -548,6 +549,7 @@ namespace bgfx
 								| BGFX_CAPS_VERTEX_ATTRIB_HALF
 								| BGFX_CAPS_VERTEX_ATTRIB_HALF
 								| BGFX_CAPS_FRAGMENT_DEPTH
 								| BGFX_CAPS_FRAGMENT_DEPTH
 								| BGFX_CAPS_BLEND_INDEPENDENT
 								| BGFX_CAPS_BLEND_INDEPENDENT
+								| BGFX_CAPS_COMPUTE
 								);
 								);
 			g_caps.maxTextureSize   = D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION;
 			g_caps.maxTextureSize   = D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION;
 			g_caps.maxFBAttachments = bx::uint32_min(D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT, BGFX_CONFIG_MAX_FRAME_BUFFER_ATTACHMENTS);
 			g_caps.maxFBAttachments = bx::uint32_min(D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT, BGFX_CONFIG_MAX_FRAME_BUFFER_ATTACHMENTS);
@@ -677,7 +679,7 @@ namespace bgfx
 
 
 		void createProgram(ProgramHandle _handle, ShaderHandle _vsh, ShaderHandle _fsh) BX_OVERRIDE
 		void createProgram(ProgramHandle _handle, ShaderHandle _vsh, ShaderHandle _fsh) BX_OVERRIDE
 		{
 		{
-			m_program[_handle.idx].create(m_shaders[_vsh.idx], m_shaders[_fsh.idx]);
+			m_program[_handle.idx].create(&m_shaders[_vsh.idx], isValid(_fsh) ? &m_shaders[_fsh.idx] : NULL);
 		}
 		}
 
 
 		void destroyProgram(ProgramHandle _handle) BX_OVERRIDE
 		void destroyProgram(ProgramHandle _handle) BX_OVERRIDE
@@ -847,9 +849,9 @@ namespace bgfx
 
 
 			ProgramD3D11& program = m_program[_blitter.m_program.idx];
 			ProgramD3D11& program = m_program[_blitter.m_program.idx];
 			m_currentProgram = &program;
 			m_currentProgram = &program;
-			deviceCtx->VSSetShader( (ID3D11VertexShader*)program.m_vsh->m_ptr, NULL, 0);
+			deviceCtx->VSSetShader(program.m_vsh->m_vertexShader, NULL, 0);
 			deviceCtx->VSSetConstantBuffers(0, 1, &program.m_vsh->m_buffer);
 			deviceCtx->VSSetConstantBuffers(0, 1, &program.m_vsh->m_buffer);
-			deviceCtx->PSSetShader( (ID3D11PixelShader*)program.m_fsh->m_ptr, NULL, 0);
+			deviceCtx->PSSetShader(program.m_fsh->m_pixelShader, NULL, 0);
 			deviceCtx->PSSetConstantBuffers(0, 1, &program.m_fsh->m_buffer);
 			deviceCtx->PSSetConstantBuffers(0, 1, &program.m_fsh->m_buffer);
 
 
 			VertexBufferD3D11& vb = m_vertexBuffers[_blitter.m_vb->handle.idx];
 			VertexBufferD3D11& vb = m_vertexBuffers[_blitter.m_vb->handle.idx];
@@ -1634,11 +1636,11 @@ namespace bgfx
 
 
 				ProgramD3D11& program = m_program[_clearQuad.m_program[numMrt].idx];
 				ProgramD3D11& program = m_program[_clearQuad.m_program[numMrt].idx];
 				m_currentProgram = &program;
 				m_currentProgram = &program;
-				deviceCtx->VSSetShader( (ID3D11VertexShader*)program.m_vsh->m_ptr, NULL, 0);
+				deviceCtx->VSSetShader(program.m_vsh->m_vertexShader, NULL, 0);
 				deviceCtx->VSSetConstantBuffers(0, 0, NULL);
 				deviceCtx->VSSetConstantBuffers(0, 0, NULL);
 				if (NULL != m_currentColor)
 				if (NULL != m_currentColor)
 				{
 				{
-					deviceCtx->PSSetShader( (ID3D11PixelShader*)program.m_fsh->m_ptr, NULL, 0);
+					deviceCtx->PSSetShader(program.m_fsh->m_pixelShader, NULL, 0);
 					deviceCtx->PSSetConstantBuffers(0, 0, NULL);
 					deviceCtx->PSSetConstantBuffers(0, 0, NULL);
 				}
 				}
 				else
 				else
@@ -1883,6 +1885,7 @@ namespace bgfx
 
 
 		switch (magic)
 		switch (magic)
 		{
 		{
+		case BGFX_CHUNK_MAGIC_CSH:
 		case BGFX_CHUNK_MAGIC_FSH:
 		case BGFX_CHUNK_MAGIC_FSH:
 		case BGFX_CHUNK_MAGIC_VSH:
 		case BGFX_CHUNK_MAGIC_VSH:
 			break;
 			break;
@@ -1903,7 +1906,10 @@ namespace bgfx
 		m_numPredefined = 0;
 		m_numPredefined = 0;
 		m_numUniforms = count;
 		m_numUniforms = count;
 
 
-		BX_TRACE("Shader consts %d", count);
+		BX_TRACE("%s Shader consts %d"
+			, BGFX_CHUNK_MAGIC_FSH == magic ? "Fragment" : BGFX_CHUNK_MAGIC_VSH == magic ? "Vertex" : "Compute"
+			, count
+			);
 
 
 		uint8_t fragmentBit = fragment ? BGFX_UNIFORM_FRAGMENTBIT : 0;
 		uint8_t fragmentBit = fragment ? BGFX_UNIFORM_FRAGMENTBIT : 0;
 
 
@@ -1954,10 +1960,10 @@ namespace bgfx
 					}
 					}
 				}
 				}
 
 
-				BX_TRACE("\t%s: %s, type %2d, num %2d, r.index %3d, r.count %2d"
+				BX_TRACE("\t%s: %s (%s), num %2d, r.index %3d, r.count %2d"
 					, kind
 					, kind
 					, name
 					, name
-					, type
+					, getUniformTypeName(UniformType::Enum(type&~BGFX_UNIFORM_FRAGMENTBIT) )
 					, num
 					, num
 					, regIndex
 					, regIndex
 					, regCount
 					, regCount
@@ -1973,20 +1979,25 @@ namespace bgfx
 		const DWORD* code = (const DWORD*)reader.getDataPtr();
 		const DWORD* code = (const DWORD*)reader.getDataPtr();
 		bx::skip(&reader, shaderSize+1);
 		bx::skip(&reader, shaderSize+1);
 
 
-		if (fragment)
+		if (BGFX_CHUNK_MAGIC_FSH == magic)
 		{
 		{
-			DX_CHECK(s_renderD3D11->m_device->CreatePixelShader(code, shaderSize, NULL, (ID3D11PixelShader**)&m_ptr) );
+			DX_CHECK(s_renderD3D11->m_device->CreatePixelShader(code, shaderSize, NULL, &m_pixelShader) );
 			BGFX_FATAL(NULL != m_ptr, bgfx::Fatal::InvalidShader, "Failed to create fragment shader.");
 			BGFX_FATAL(NULL != m_ptr, bgfx::Fatal::InvalidShader, "Failed to create fragment shader.");
 		}
 		}
-		else
+		else if (BGFX_CHUNK_MAGIC_VSH == magic)
 		{
 		{
 			m_hash = bx::hashMurmur2A(code, shaderSize);
 			m_hash = bx::hashMurmur2A(code, shaderSize);
 			m_code = alloc(shaderSize);
 			m_code = alloc(shaderSize);
 			memcpy(m_code->data, code, shaderSize);
 			memcpy(m_code->data, code, shaderSize);
 
 
-			DX_CHECK(s_renderD3D11->m_device->CreateVertexShader(code, shaderSize, NULL, (ID3D11VertexShader**)&m_ptr) );
+			DX_CHECK(s_renderD3D11->m_device->CreateVertexShader(code, shaderSize, NULL, &m_vertexShader) );
 			BGFX_FATAL(NULL != m_ptr, bgfx::Fatal::InvalidShader, "Failed to create vertex shader.");
 			BGFX_FATAL(NULL != m_ptr, bgfx::Fatal::InvalidShader, "Failed to create vertex shader.");
 		}
 		}
+		else
+		{
+			DX_CHECK(s_renderD3D11->m_device->CreateComputeShader(code, shaderSize, NULL, &m_computeShader) );
+			BGFX_FATAL(NULL != m_ptr, bgfx::Fatal::InvalidShader, "Failed to create compute shader.");
+		}
 
 
 		bx::read(&reader, m_attrMask, sizeof(m_attrMask) );
 		bx::read(&reader, m_attrMask, sizeof(m_attrMask) );
 
 
@@ -2056,8 +2067,9 @@ namespace bgfx
 			uint32_t kk = 0;
 			uint32_t kk = 0;
 
 
 			const bool compressed = isCompressed(TextureFormat::Enum(m_textureFormat) );
 			const bool compressed = isCompressed(TextureFormat::Enum(m_textureFormat) );
+			const bool swizzle    = TextureFormat::BGRA8 == m_textureFormat && 0 != (m_flags&BGFX_TEXTURE_COMPUTE_WRITE);
 
 
-			BX_TRACE("Texture %3d: %s (requested: %s), %dx%d%s%s."
+			BX_TRACE("Texture %3d: %s (requested: %s), %dx%d%s%s%s."
 				, this - s_renderD3D11->m_textures
 				, this - s_renderD3D11->m_textures
 				, getName( (TextureFormat::Enum)m_textureFormat)
 				, getName( (TextureFormat::Enum)m_textureFormat)
 				, getName( (TextureFormat::Enum)m_requestedFormat)
 				, getName( (TextureFormat::Enum)m_requestedFormat)
@@ -2065,6 +2077,7 @@ namespace bgfx
 				, textureHeight
 				, textureHeight
 				, imageContainer.m_cubeMap ? "x6" : ""
 				, imageContainer.m_cubeMap ? "x6" : ""
 				, 0 != (m_flags&BGFX_TEXTURE_RT_MASK) ? " (render target)" : ""
 				, 0 != (m_flags&BGFX_TEXTURE_RT_MASK) ? " (render target)" : ""
+				, swizzle ? " (swizzle BGRA8 -> RGBA8)" : ""
 				);
 				);
 
 
 			for (uint8_t side = 0, numSides = imageContainer.m_cubeMap ? 6 : 1; side < numSides; ++side)
 			for (uint8_t side = 0, numSides = imageContainer.m_cubeMap ? 6 : 1; side < numSides; ++side)
@@ -2095,7 +2108,7 @@ namespace bgfx
 						}
 						}
 						else if (compressed)
 						else if (compressed)
 						{
 						{
-							srd[kk].SysMemPitch = (mip.m_width/blockInfo.blockWidth)*mip.m_blockSize;
+							srd[kk].SysMemPitch      = (mip.m_width /blockInfo.blockWidth )*mip.m_blockSize;
 							srd[kk].SysMemSlicePitch = (mip.m_height/blockInfo.blockHeight)*srd[kk].SysMemPitch;
 							srd[kk].SysMemSlicePitch = (mip.m_height/blockInfo.blockHeight)*srd[kk].SysMemPitch;
 						}
 						}
 						else
 						else
@@ -2103,6 +2116,11 @@ namespace bgfx
 							srd[kk].SysMemPitch = mip.m_width*mip.m_bpp/8;
 							srd[kk].SysMemPitch = mip.m_width*mip.m_bpp/8;
 						}
 						}
 
 
+ 						if (swizzle)
+ 						{
+// 							imageSwizzleBgra8(width, height, mip.m_width*4, data, temp);
+ 						}
+
 						srd[kk].SysMemSlicePitch = mip.m_height*srd[kk].SysMemPitch;
 						srd[kk].SysMemSlicePitch = mip.m_height*srd[kk].SysMemPitch;
 						++kk;
 						++kk;
 					}
 					}
@@ -2113,17 +2131,23 @@ namespace bgfx
 				}
 				}
 			}
 			}
 
 
-			D3D11_SHADER_RESOURCE_VIEW_DESC srvd;
-			memset(&srvd, 0, sizeof(srvd) );
-			srvd.Format = s_textureFormat[m_textureFormat].m_fmtSrv;
-
-			const DXGI_FORMAT format = s_textureFormat[m_textureFormat].m_fmt;
-
 			const bool bufferOnly   = 0 != (m_flags&BGFX_TEXTURE_RT_BUFFER_ONLY);
 			const bool bufferOnly   = 0 != (m_flags&BGFX_TEXTURE_RT_BUFFER_ONLY);
+			const bool computeWrite = 0 != (m_flags&BGFX_TEXTURE_COMPUTE_WRITE);
 			const bool renderTarget = 0 != (m_flags&BGFX_TEXTURE_RT_MASK);
 			const bool renderTarget = 0 != (m_flags&BGFX_TEXTURE_RT_MASK);
 			const uint32_t msaaQuality = bx::uint32_satsub( (m_flags&BGFX_TEXTURE_RT_MSAA_MASK)>>BGFX_TEXTURE_RT_MSAA_SHIFT, 1);
 			const uint32_t msaaQuality = bx::uint32_satsub( (m_flags&BGFX_TEXTURE_RT_MSAA_MASK)>>BGFX_TEXTURE_RT_MSAA_SHIFT, 1);
 			const DXGI_SAMPLE_DESC& msaa = s_msaa[msaaQuality];
 			const DXGI_SAMPLE_DESC& msaa = s_msaa[msaaQuality];
 
 
+			D3D11_SHADER_RESOURCE_VIEW_DESC srvd;
+			memset(&srvd, 0, sizeof(srvd) );
+			srvd.Format = s_textureFormat[m_textureFormat].m_fmtSrv;
+			DXGI_FORMAT format = s_textureFormat[m_textureFormat].m_fmt;
+
+			if (swizzle)
+			{
+				format      = DXGI_FORMAT_R8G8B8A8_UNORM;
+				srvd.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
+			}
+
 			switch (m_type)
 			switch (m_type)
 			{
 			{
 			case Texture2D:
 			case Texture2D:
@@ -2150,6 +2174,12 @@ namespace bgfx
 						desc.Usage = D3D11_USAGE_DEFAULT;
 						desc.Usage = D3D11_USAGE_DEFAULT;
 					}
 					}
 
 
+					if (computeWrite)
+					{
+						desc.BindFlags |= D3D11_BIND_UNORDERED_ACCESS;
+						desc.Usage = D3D11_USAGE_DEFAULT;
+					}
+
 					if (imageContainer.m_cubeMap)
 					if (imageContainer.m_cubeMap)
 					{
 					{
 						desc.ArraySize = 6;
 						desc.ArraySize = 6;
@@ -2182,6 +2212,12 @@ namespace bgfx
 					desc.CPUAccessFlags = 0;
 					desc.CPUAccessFlags = 0;
 					desc.MiscFlags = 0;
 					desc.MiscFlags = 0;
 
 
+					if (computeWrite)
+					{
+						desc.BindFlags |= D3D11_BIND_UNORDERED_ACCESS;
+						desc.Usage = D3D11_USAGE_DEFAULT;
+					}
+
 					srvd.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE3D;
 					srvd.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE3D;
 					srvd.Texture3D.MipLevels = numMips;
 					srvd.Texture3D.MipLevels = numMips;
 
 
@@ -2195,6 +2231,11 @@ namespace bgfx
 				DX_CHECK(s_renderD3D11->m_device->CreateShaderResourceView(m_ptr, &srvd, &m_srv) );
 				DX_CHECK(s_renderD3D11->m_device->CreateShaderResourceView(m_ptr, &srvd, &m_srv) );
 			}
 			}
 
 
+			if (computeWrite)
+			{
+				DX_CHECK(s_renderD3D11->m_device->CreateUnorderedAccessView(m_ptr, NULL, &m_uav) );
+			}
+
 			if (convert
 			if (convert
 			&&  0 != kk)
 			&&  0 != kk)
 			{
 			{
@@ -2214,6 +2255,7 @@ namespace bgfx
 	void TextureD3D11::destroy()
 	void TextureD3D11::destroy()
 	{
 	{
 		DX_RELEASE(m_srv, 0);
 		DX_RELEASE(m_srv, 0);
+		DX_RELEASE(m_uav, 0);
 		DX_RELEASE(m_ptr, 0);
 		DX_RELEASE(m_ptr, 0);
 	}
 	}
 
 
@@ -2376,8 +2418,8 @@ namespace bgfx
 
 
 		_render->sort();
 		_render->sort();
 
 
-		RenderState currentState;
-		currentState.reset();
+		RenderDraw currentState;
+		currentState.clear();
 		currentState.m_flags = BGFX_STATE_NONE;
 		currentState.m_flags = BGFX_STATE_NONE;
 		currentState.m_stencil = packStencil(BGFX_STENCIL_NONE, BGFX_STENCIL_NONE);
 		currentState.m_stencil = packStencil(BGFX_STENCIL_NONE, BGFX_STENCIL_NONE);
 
 
@@ -2409,6 +2451,7 @@ namespace bgfx
 		PrimInfo prim = s_primInfo[primIndex];
 		PrimInfo prim = s_primInfo[primIndex];
 		deviceCtx->IASetPrimitiveTopology(prim.m_type);
 		deviceCtx->IASetPrimitiveTopology(prim.m_type);
 
 
+		bool wasCompute = false;
 		bool viewHasScissor = false;
 		bool viewHasScissor = false;
 		Rect viewScissorRect;
 		Rect viewScissorRect;
 		viewScissorRect.clear();
 		viewScissorRect.clear();
@@ -2422,26 +2465,13 @@ namespace bgfx
 		{
 		{
 			for (uint32_t item = 0, numItems = _render->m_num; item < numItems; ++item)
 			for (uint32_t item = 0, numItems = _render->m_num; item < numItems; ++item)
 			{
 			{
-				key.decode(_render->m_sortKeys[item]);
-				const RenderState& state = _render->m_renderState[_render->m_sortValues[item] ];
+				const bool isCompute = key.decode(_render->m_sortKeys[item]);
+				const bool viewChanged = key.m_view != view;
 
 
-				const uint64_t newFlags = state.m_flags;
-				uint64_t changedFlags = currentState.m_flags ^ state.m_flags;
-				currentState.m_flags = newFlags;
+				const RenderItem& renderItem = _render->m_renderItem[_render->m_sortValues[item] ];
 
 
-				const uint64_t newStencil = state.m_stencil;
-				uint64_t changedStencil = currentState.m_stencil ^ state.m_stencil;
-				currentState.m_stencil = newStencil;
-
-				if (key.m_view != view)
+				if (viewChanged)
 				{
 				{
-					currentState.clear();
-					currentState.m_scissor = !state.m_scissor;
-					changedFlags = BGFX_STATE_MASK;
-					changedStencil = packStencil(BGFX_STENCIL_MASK, BGFX_STENCIL_MASK);
-					currentState.m_flags = newFlags;
-					currentState.m_stencil = newStencil;
-
 					PIX_ENDEVENT();
 					PIX_ENDEVENT();
 					PIX_BEGINEVENT(D3DCOLOR_RGBA(0xff, 0x00, 0x00, 0xff), s_viewNameW[key.m_view]);
 					PIX_BEGINEVENT(D3DCOLOR_RGBA(0xff, 0x00, 0x00, 0xff), s_viewNameW[key.m_view]);
 
 
@@ -2473,7 +2503,147 @@ namespace bgfx
 					{
 					{
 						clearQuad(_clearQuad, rect, clear);
 						clearQuad(_clearQuad, rect, clear);
 					}
 					}
+				}
+
+				if (isCompute)
+				{
+					if (!wasCompute)
+					{
+						wasCompute = true;
+
+						ID3D11ShaderResourceView* srv[BGFX_STATE_TEX_COUNT] = {};
+						deviceCtx->VSSetShaderResources(0, BGFX_STATE_TEX_COUNT, srv);
+						deviceCtx->PSSetShaderResources(0, BGFX_STATE_TEX_COUNT, srv);
+
+						ID3D11SamplerState* sampler[BGFX_STATE_TEX_COUNT] = {};
+						deviceCtx->VSSetSamplers(0, BGFX_STATE_TEX_COUNT, sampler);
+						deviceCtx->PSSetSamplers(0, BGFX_STATE_TEX_COUNT, sampler);
+					}
+
+					const RenderCompute& compute = renderItem.compute;
+
+					bool programChanged = false;
+					bool constantsChanged = compute.m_constBegin < compute.m_constEnd;
+					rendererUpdateUniforms(this, _render->m_constantBuffer, compute.m_constBegin, compute.m_constEnd);
+
+					if (key.m_program != programIdx)
+					{
+						programIdx = key.m_program;
+
+						ProgramD3D11& program = m_program[key.m_program];
+						m_currentProgram = &program;
+
+						deviceCtx->CSSetShader(program.m_vsh->m_computeShader, NULL, 0);
+						deviceCtx->CSSetConstantBuffers(0, 1, &program.m_vsh->m_buffer);
+
+						programChanged = 
+							constantsChanged = true;
+					}
+
+					if (invalidHandle != programIdx)
+					{
+						ProgramD3D11& program = m_program[programIdx];
+
+						if (constantsChanged)
+						{
+							ConstantBuffer* vcb = program.m_vsh->m_constantBuffer;
+							if (NULL != vcb)
+							{
+								commit(*vcb);
+							}
+						}
+
+						if (constantsChanged
+						||  program.m_numPredefined > 0)
+						{
+							commitShaderConstants();
+						}
+					}
+
+					ID3D11UnorderedAccessView* uav[BGFX_MAX_COMPUTE_BINDINGS] = {};
+					ID3D11ShaderResourceView*  srv[BGFX_MAX_COMPUTE_BINDINGS] = {};
+					ID3D11SamplerState*    sampler[BGFX_STATE_TEX_COUNT] = {};
+
+					for (uint32_t ii = 0; ii < BGFX_MAX_COMPUTE_BINDINGS; ++ii)
+					{
+						const ComputeBinding& bind = compute.m_bind[ii];
+						if (invalidHandle != bind.m_idx)
+						{
+							switch (bind.m_type)
+							{
+							case ComputeBinding::Image:
+								{
+									const TextureD3D11& texture = m_textures[bind.m_idx];
+									if (Access::Read != bind.m_access)
+									{
+										uav[ii] = texture.m_uav;
+									}
+									else
+									{
+										srv[ii] = texture.m_srv;
+										sampler[ii] = texture.m_sampler;
+									}
+								}
+								break;
+
+							case ComputeBinding::Buffer:
+								{
+									const VertexBufferD3D11& vertexBuffer = m_vertexBuffers[bind.m_idx];
+									BX_UNUSED(vertexBuffer);
+								}
+								break;
+							}
+						}
+					}
+
+					deviceCtx->CSSetUnorderedAccessViews(0, BGFX_MAX_COMPUTE_BINDINGS, uav, NULL);
+					deviceCtx->CSSetShaderResources(0, BGFX_MAX_COMPUTE_BINDINGS, srv);
+					deviceCtx->CSSetSamplers(0, BGFX_MAX_COMPUTE_BINDINGS, sampler);
+
+					deviceCtx->Dispatch(compute.m_numX, compute.m_numY, compute.m_numZ);
+
+					continue;
+				}
+
+				if (wasCompute)
+				{
+					wasCompute = false;
+
+					programIdx = invalidHandle;
+					m_currentProgram = NULL;
+
+					deviceCtx->CSSetShader(NULL, NULL, 0);
+
+					ID3D11UnorderedAccessView* uav[BGFX_STATE_TEX_COUNT] = {};
+					deviceCtx->CSSetUnorderedAccessViews(0, BGFX_STATE_TEX_COUNT, uav, NULL);
+
+					ID3D11ShaderResourceView* srv[BGFX_STATE_TEX_COUNT] = {};
+					deviceCtx->CSSetShaderResources(0, BGFX_STATE_TEX_COUNT, srv);
+
+					ID3D11SamplerState* samplers[BGFX_STATE_TEX_COUNT] = {};
+					m_deviceCtx->CSSetSamplers(0, BGFX_STATE_TEX_COUNT, samplers);
+				}
+
+				const RenderDraw& draw = renderItem.draw;
+
+				const uint64_t newFlags = draw.m_flags;
+				uint64_t changedFlags = currentState.m_flags ^ draw.m_flags;
+				currentState.m_flags = newFlags;
+
+				const uint64_t newStencil = draw.m_stencil;
+				uint64_t changedStencil = currentState.m_stencil ^ draw.m_stencil;
+				currentState.m_stencil = newStencil;
+
+				if (viewChanged)
+				{
+					currentState.clear();
+					currentState.m_scissor = !draw.m_scissor;
+					changedFlags = BGFX_STATE_MASK;
+					changedStencil = packStencil(BGFX_STENCIL_MASK, BGFX_STENCIL_MASK);
+					currentState.m_flags = newFlags;
+					currentState.m_stencil = newStencil;
 
 
+					uint64_t newFlags = renderItem.draw.m_flags;
 					setBlendState(newFlags);
 					setBlendState(newFlags);
 					setDepthStencilState(newFlags, packStencil(BGFX_STENCIL_DEFAULT, BGFX_STENCIL_DEFAULT) );
 					setDepthStencilState(newFlags, packStencil(BGFX_STENCIL_DEFAULT, BGFX_STENCIL_DEFAULT) );
 
 
@@ -2486,7 +2656,7 @@ namespace bgfx
 					}
 					}
 				}
 				}
 
 
-				uint16_t scissor = state.m_scissor;
+				uint16_t scissor = draw.m_scissor;
 				if (currentState.m_scissor != scissor)
 				if (currentState.m_scissor != scissor)
 				{
 				{
 					currentState.m_scissor = scissor;
 					currentState.m_scissor = scissor;
@@ -2540,7 +2710,7 @@ namespace bgfx
 				{
 				{
 					if ( (BGFX_STATE_BLEND_MASK|BGFX_STATE_BLEND_EQUATION_MASK|BGFX_STATE_ALPHA_WRITE|BGFX_STATE_RGB_WRITE) & changedFlags)
 					if ( (BGFX_STATE_BLEND_MASK|BGFX_STATE_BLEND_EQUATION_MASK|BGFX_STATE_ALPHA_WRITE|BGFX_STATE_RGB_WRITE) & changedFlags)
 					{
 					{
-						setBlendState(newFlags, state.m_rgba);
+						setBlendState(newFlags, draw.m_rgba);
 					}
 					}
 
 
 					if ( (BGFX_STATE_CULL_MASK|BGFX_STATE_MSAA) & changedFlags)
 					if ( (BGFX_STATE_CULL_MASK|BGFX_STATE_MSAA) & changedFlags)
@@ -2564,8 +2734,8 @@ namespace bgfx
 				}
 				}
 
 
 				bool programChanged = false;
 				bool programChanged = false;
-				bool constantsChanged = state.m_constBegin < state.m_constEnd;
-				rendererUpdateUniforms(this, _render->m_constantBuffer, state.m_constBegin, state.m_constEnd);
+				bool constantsChanged = draw.m_constBegin < draw.m_constEnd;
+				rendererUpdateUniforms(this, _render->m_constantBuffer, draw.m_constBegin, draw.m_constEnd);
 
 
 				if (key.m_program != programIdx)
 				if (key.m_program != programIdx)
 				{
 				{
@@ -2583,13 +2753,14 @@ namespace bgfx
 						ProgramD3D11& program = m_program[programIdx];
 						ProgramD3D11& program = m_program[programIdx];
 						m_currentProgram = &program;
 						m_currentProgram = &program;
 
 
-						deviceCtx->VSSetShader( (ID3D11VertexShader*)program.m_vsh->m_ptr, NULL, 0);
-						deviceCtx->VSSetConstantBuffers(0, 1, &program.m_vsh->m_buffer);
+						const ShaderD3D11* vsh = program.m_vsh;
+						deviceCtx->VSSetShader(vsh->m_vertexShader, NULL, 0);
+						deviceCtx->VSSetConstantBuffers(0, 1, &vsh->m_buffer);
 
 
 						if (NULL != m_currentColor)
 						if (NULL != m_currentColor)
 						{
 						{
 							const ShaderD3D11* fsh = program.m_fsh;
 							const ShaderD3D11* fsh = program.m_fsh;
-							deviceCtx->PSSetShader( (ID3D11PixelShader*)fsh->m_ptr, NULL, 0);
+							deviceCtx->PSSetShader(fsh->m_pixelShader, NULL, 0);
 							deviceCtx->PSSetConstantBuffers(0, 1, &fsh->m_buffer);
 							deviceCtx->PSSetConstantBuffers(0, 1, &fsh->m_buffer);
 						}
 						}
 						else
 						else
@@ -2705,15 +2876,15 @@ namespace bgfx
 
 
 						case PredefinedUniform::Model:
 						case PredefinedUniform::Model:
 							{
 							{
-								const Matrix4& model = _render->m_matrixCache.m_cache[state.m_matrix];
-								setShaderConstant(flags, predefined.m_loc, model.un.val, bx::uint32_min(state.m_num*4, predefined.m_count) );
+								const Matrix4& model = _render->m_matrixCache.m_cache[draw.m_matrix];
+								setShaderConstant(flags, predefined.m_loc, model.un.val, bx::uint32_min(draw.m_num*4, predefined.m_count) );
 							}
 							}
 							break;
 							break;
 
 
 						case PredefinedUniform::ModelView:
 						case PredefinedUniform::ModelView:
 							{
 							{
 								Matrix4 modelView;
 								Matrix4 modelView;
-								const Matrix4& model = _render->m_matrixCache.m_cache[state.m_matrix];
+								const Matrix4& model = _render->m_matrixCache.m_cache[draw.m_matrix];
 								bx::float4x4_mul(&modelView.un.f4x4, &model.un.f4x4, &_render->m_view[view].un.f4x4);
 								bx::float4x4_mul(&modelView.un.f4x4, &model.un.f4x4, &_render->m_view[view].un.f4x4);
 								setShaderConstant(flags, predefined.m_loc, modelView.un.val, bx::uint32_min(4, predefined.m_count) );
 								setShaderConstant(flags, predefined.m_loc, modelView.un.val, bx::uint32_min(4, predefined.m_count) );
 							}
 							}
@@ -2722,7 +2893,7 @@ namespace bgfx
 						case PredefinedUniform::ModelViewProj:
 						case PredefinedUniform::ModelViewProj:
 							{
 							{
 								Matrix4 modelViewProj;
 								Matrix4 modelViewProj;
-								const Matrix4& model = _render->m_matrixCache.m_cache[state.m_matrix];
+								const Matrix4& model = _render->m_matrixCache.m_cache[draw.m_matrix];
 								bx::float4x4_mul(&modelViewProj.un.f4x4, &model.un.f4x4, &viewProj[view].un.f4x4);
 								bx::float4x4_mul(&modelViewProj.un.f4x4, &model.un.f4x4, &viewProj[view].un.f4x4);
 								setShaderConstant(flags, predefined.m_loc, modelViewProj.un.val, bx::uint32_min(4, predefined.m_count) );
 								setShaderConstant(flags, predefined.m_loc, modelViewProj.un.val, bx::uint32_min(4, predefined.m_count) );
 							}
 							}
@@ -2753,7 +2924,7 @@ namespace bgfx
 					uint64_t flag = BGFX_STATE_TEX0;
 					uint64_t flag = BGFX_STATE_TEX0;
 					for (uint32_t stage = 0; stage < BGFX_STATE_TEX_COUNT; ++stage)
 					for (uint32_t stage = 0; stage < BGFX_STATE_TEX_COUNT; ++stage)
 					{
 					{
-						const Sampler& sampler = state.m_sampler[stage];
+						const Sampler& sampler = draw.m_sampler[stage];
 						Sampler& current = currentState.m_sampler[stage];
 						Sampler& current = currentState.m_sampler[stage];
 						if (current.m_idx != sampler.m_idx
 						if (current.m_idx != sampler.m_idx
 						||  current.m_flags != sampler.m_flags
 						||  current.m_flags != sampler.m_flags
@@ -2784,33 +2955,33 @@ namespace bgfx
 				}
 				}
 
 
 				if (programChanged
 				if (programChanged
-				||  currentState.m_vertexBuffer.idx != state.m_vertexBuffer.idx
-				||  currentState.m_instanceDataBuffer.idx != state.m_instanceDataBuffer.idx
-				||  currentState.m_instanceDataOffset != state.m_instanceDataOffset
-				||  currentState.m_instanceDataStride != state.m_instanceDataStride)
+				||  currentState.m_vertexBuffer.idx != draw.m_vertexBuffer.idx
+				||  currentState.m_instanceDataBuffer.idx != draw.m_instanceDataBuffer.idx
+				||  currentState.m_instanceDataOffset != draw.m_instanceDataOffset
+				||  currentState.m_instanceDataStride != draw.m_instanceDataStride)
 				{
 				{
-					currentState.m_vertexBuffer = state.m_vertexBuffer;
-					currentState.m_instanceDataBuffer.idx = state.m_instanceDataBuffer.idx;
-					currentState.m_instanceDataOffset = state.m_instanceDataOffset;
-					currentState.m_instanceDataStride = state.m_instanceDataStride;
+					currentState.m_vertexBuffer = draw.m_vertexBuffer;
+					currentState.m_instanceDataBuffer.idx = draw.m_instanceDataBuffer.idx;
+					currentState.m_instanceDataOffset = draw.m_instanceDataOffset;
+					currentState.m_instanceDataStride = draw.m_instanceDataStride;
 
 
-					uint16_t handle = state.m_vertexBuffer.idx;
+					uint16_t handle = draw.m_vertexBuffer.idx;
 					if (invalidHandle != handle)
 					if (invalidHandle != handle)
 					{
 					{
 						const VertexBufferD3D11& vb = m_vertexBuffers[handle];
 						const VertexBufferD3D11& vb = m_vertexBuffers[handle];
 
 
-						uint16_t decl = !isValid(vb.m_decl) ? state.m_vertexDecl.idx : vb.m_decl.idx;
+						uint16_t decl = !isValid(vb.m_decl) ? draw.m_vertexDecl.idx : vb.m_decl.idx;
 						const VertexDecl& vertexDecl = m_vertexDecls[decl];
 						const VertexDecl& vertexDecl = m_vertexDecls[decl];
 						uint32_t stride = vertexDecl.m_stride;
 						uint32_t stride = vertexDecl.m_stride;
 						uint32_t offset = 0;
 						uint32_t offset = 0;
 						deviceCtx->IASetVertexBuffers(0, 1, &vb.m_ptr, &stride, &offset);
 						deviceCtx->IASetVertexBuffers(0, 1, &vb.m_ptr, &stride, &offset);
 
 
-						if (isValid(state.m_instanceDataBuffer) )
+						if (isValid(draw.m_instanceDataBuffer) )
 						{
 						{
- 							const VertexBufferD3D11& inst = m_vertexBuffers[state.m_instanceDataBuffer.idx];
-							uint32_t instStride = state.m_instanceDataStride;
-							deviceCtx->IASetVertexBuffers(1, 1, &inst.m_ptr, &instStride, &state.m_instanceDataOffset);
-							setInputLayout(vertexDecl, m_program[programIdx], state.m_instanceDataStride/16);
+ 							const VertexBufferD3D11& inst = m_vertexBuffers[draw.m_instanceDataBuffer.idx];
+							uint32_t instStride = draw.m_instanceDataStride;
+							deviceCtx->IASetVertexBuffers(1, 1, &inst.m_ptr, &instStride, &draw.m_instanceDataOffset);
+							setInputLayout(vertexDecl, m_program[programIdx], draw.m_instanceDataStride/16);
 						}
 						}
 						else
 						else
 						{
 						{
@@ -2824,11 +2995,11 @@ namespace bgfx
 					}
 					}
 				}
 				}
 
 
-				if (currentState.m_indexBuffer.idx != state.m_indexBuffer.idx)
+				if (currentState.m_indexBuffer.idx != draw.m_indexBuffer.idx)
 				{
 				{
-					currentState.m_indexBuffer = state.m_indexBuffer;
+					currentState.m_indexBuffer = draw.m_indexBuffer;
 
 
-					uint16_t handle = state.m_indexBuffer.idx;
+					uint16_t handle = draw.m_indexBuffer.idx;
 					if (invalidHandle != handle)
 					if (invalidHandle != handle)
 					{
 					{
 						const IndexBufferD3D11& ib = m_indexBuffers[handle];
 						const IndexBufferD3D11& ib = m_indexBuffers[handle];
@@ -2842,11 +3013,11 @@ namespace bgfx
 
 
 				if (isValid(currentState.m_vertexBuffer) )
 				if (isValid(currentState.m_vertexBuffer) )
 				{
 				{
-					uint32_t numVertices = state.m_numVertices;
+					uint32_t numVertices = draw.m_numVertices;
 					if (UINT32_MAX == numVertices)
 					if (UINT32_MAX == numVertices)
 					{
 					{
 						const VertexBufferD3D11& vb = m_vertexBuffers[currentState.m_vertexBuffer.idx];
 						const VertexBufferD3D11& vb = m_vertexBuffers[currentState.m_vertexBuffer.idx];
-						uint16_t decl = !isValid(vb.m_decl) ? state.m_vertexDecl.idx : vb.m_decl.idx;
+						uint16_t decl = !isValid(vb.m_decl) ? draw.m_vertexDecl.idx : vb.m_decl.idx;
 						const VertexDecl& vertexDecl = m_vertexDecls[decl];
 						const VertexDecl& vertexDecl = m_vertexDecls[decl];
 						numVertices = vb.m_size/vertexDecl.m_stride;
 						numVertices = vb.m_size/vertexDecl.m_stride;
 					}
 					}
@@ -2856,33 +3027,33 @@ namespace bgfx
 					uint32_t numInstances = 0;
 					uint32_t numInstances = 0;
 					uint32_t numPrimsRendered = 0;
 					uint32_t numPrimsRendered = 0;
 
 
-					if (isValid(state.m_indexBuffer) )
+					if (isValid(draw.m_indexBuffer) )
 					{
 					{
-						if (UINT32_MAX == state.m_numIndices)
+						if (UINT32_MAX == draw.m_numIndices)
 						{
 						{
-							numIndices = m_indexBuffers[state.m_indexBuffer.idx].m_size/2;
+							numIndices = m_indexBuffers[draw.m_indexBuffer.idx].m_size/2;
 							numPrimsSubmitted = numIndices/prim.m_div - prim.m_sub;
 							numPrimsSubmitted = numIndices/prim.m_div - prim.m_sub;
-							numInstances = state.m_numInstances;
-							numPrimsRendered = numPrimsSubmitted*state.m_numInstances;
+							numInstances = draw.m_numInstances;
+							numPrimsRendered = numPrimsSubmitted*draw.m_numInstances;
 
 
 							deviceCtx->DrawIndexedInstanced(numIndices
 							deviceCtx->DrawIndexedInstanced(numIndices
-								, state.m_numInstances
+								, draw.m_numInstances
 								, 0
 								, 0
-								, state.m_startVertex
+								, draw.m_startVertex
 								, 0
 								, 0
 								);
 								);
 						}
 						}
-						else if (prim.m_min <= state.m_numIndices)
+						else if (prim.m_min <= draw.m_numIndices)
 						{
 						{
-							numIndices = state.m_numIndices;
+							numIndices = draw.m_numIndices;
 							numPrimsSubmitted = numIndices/prim.m_div - prim.m_sub;
 							numPrimsSubmitted = numIndices/prim.m_div - prim.m_sub;
-							numInstances = state.m_numInstances;
-							numPrimsRendered = numPrimsSubmitted*state.m_numInstances;
+							numInstances = draw.m_numInstances;
+							numPrimsRendered = numPrimsSubmitted*draw.m_numInstances;
 
 
 							deviceCtx->DrawIndexedInstanced(numIndices
 							deviceCtx->DrawIndexedInstanced(numIndices
-								, state.m_numInstances
-								, state.m_startIndex
-								, state.m_startVertex
+								, draw.m_numInstances
+								, draw.m_startIndex
+								, draw.m_startVertex
 								, 0
 								, 0
 								);
 								);
 						}
 						}
@@ -2890,12 +3061,12 @@ namespace bgfx
 					else
 					else
 					{
 					{
 						numPrimsSubmitted = numVertices/prim.m_div - prim.m_sub;
 						numPrimsSubmitted = numVertices/prim.m_div - prim.m_sub;
-						numInstances = state.m_numInstances;
-						numPrimsRendered = numPrimsSubmitted*state.m_numInstances;
+						numInstances = draw.m_numInstances;
+						numPrimsRendered = numPrimsSubmitted*draw.m_numInstances;
 
 
 						deviceCtx->DrawInstanced(numVertices
 						deviceCtx->DrawInstanced(numVertices
-							, state.m_numInstances
-							, state.m_startVertex
+							, draw.m_numInstances
+							, draw.m_startVertex
 							, 0
 							, 0
 							);
 							);
 					}
 					}

+ 22 - 11
src/renderer_d3d11.h

@@ -156,7 +156,13 @@ namespace bgfx
 			}
 			}
 		}
 		}
 
 
-		IUnknown* m_ptr;
+		union
+		{
+			ID3D11ComputeShader* m_computeShader;
+			ID3D11PixelShader*   m_pixelShader;
+			ID3D11VertexShader*  m_vertexShader;
+			IUnknown*            m_ptr;
+		};
 		const Memory* m_code;
 		const Memory* m_code;
 		ID3D11Buffer* m_buffer;
 		ID3D11Buffer* m_buffer;
 		ConstantBuffer* m_constantBuffer;
 		ConstantBuffer* m_constantBuffer;
@@ -178,17 +184,20 @@ namespace bgfx
 		{
 		{
 		}
 		}
 
 
-		void create(const ShaderD3D11& _vsh, const ShaderD3D11& _fsh)
+		void create(const ShaderD3D11* _vsh, const ShaderD3D11* _fsh)
 		{
 		{
-			BX_CHECK(NULL != _vsh.m_ptr, "Vertex shader doesn't exist.");
-			m_vsh = &_vsh;
-			memcpy(&m_predefined[0], _vsh.m_predefined, _vsh.m_numPredefined*sizeof(PredefinedUniform) );
-			m_numPredefined = _vsh.m_numPredefined;
-
-			BX_CHECK(NULL != _fsh.m_ptr, "Fragment shader doesn't exist.");
-			m_fsh = &_fsh;
-			memcpy(&m_predefined[m_numPredefined], _fsh.m_predefined, _fsh.m_numPredefined*sizeof(PredefinedUniform) );
-			m_numPredefined += _fsh.m_numPredefined;
+			BX_CHECK(NULL != _vsh->m_ptr, "Vertex shader doesn't exist.");
+			m_vsh = _vsh;
+			memcpy(&m_predefined[0], _vsh->m_predefined, _vsh->m_numPredefined*sizeof(PredefinedUniform) );
+			m_numPredefined = _vsh->m_numPredefined;
+
+			if (NULL != _fsh)
+			{
+				BX_CHECK(NULL != _fsh->m_ptr, "Fragment shader doesn't exist.");
+				m_fsh = _fsh;
+				memcpy(&m_predefined[m_numPredefined], _fsh->m_predefined, _fsh->m_numPredefined*sizeof(PredefinedUniform) );
+				m_numPredefined += _fsh->m_numPredefined;
+			}
 		}
 		}
 
 
 		void destroy()
 		void destroy()
@@ -217,6 +226,7 @@ namespace bgfx
 		TextureD3D11()
 		TextureD3D11()
 			: m_ptr(NULL)
 			: m_ptr(NULL)
 			, m_srv(NULL)
 			, m_srv(NULL)
+			, m_uav(NULL)
 			, m_sampler(NULL)
 			, m_sampler(NULL)
 			, m_numMips(0)
 			, m_numMips(0)
 		{
 		{
@@ -236,6 +246,7 @@ namespace bgfx
 		};
 		};
 
 
 		ID3D11ShaderResourceView* m_srv;
 		ID3D11ShaderResourceView* m_srv;
+		ID3D11UnorderedAccessView* m_uav;
 		ID3D11SamplerState* m_sampler;
 		ID3D11SamplerState* m_sampler;
 		uint32_t m_flags;
 		uint32_t m_flags;
 		uint8_t m_type;
 		uint8_t m_type;

+ 75 - 65
src/renderer_d3d9.cpp

@@ -183,7 +183,7 @@ namespace bgfx
 		D3DFORMAT m_fmt;
 		D3DFORMAT m_fmt;
 	};
 	};
 
 
-	static TextureFormatInfo s_textureFormat[TextureFormat::Count] =
+	static TextureFormatInfo s_textureFormat[] =
 	{
 	{
 		{ D3DFMT_DXT1          }, // BC1 
 		{ D3DFMT_DXT1          }, // BC1 
 		{ D3DFMT_DXT3          }, // BC2
 		{ D3DFMT_DXT3          }, // BC2
@@ -225,6 +225,7 @@ namespace bgfx
 		{ D3DFMT_INTZ /*D3DFMT_S8_LOCKABLE*/   }, // D0S8
 		{ D3DFMT_INTZ /*D3DFMT_S8_LOCKABLE*/   }, // D0S8
 #endif // defined(D3D_DISABLE_9EX)
 #endif // defined(D3D_DISABLE_9EX)
 	};
 	};
+	BX_STATIC_ASSERT(TextureFormat::Count == BX_COUNTOF(s_textureFormat) );
 
 
 	static ExtendedFormat s_extendedFormats[ExtendedFormat::Count] =
 	static ExtendedFormat s_extendedFormats[ExtendedFormat::Count] =
 	{
 	{
@@ -822,8 +823,8 @@ namespace bgfx
 			DX_CHECK(device->SetRenderState(D3DRS_FILLMODE, D3DFILL_SOLID) );
 			DX_CHECK(device->SetRenderState(D3DRS_FILLMODE, D3DFILL_SOLID) );
 
 
 			ProgramD3D9& program = m_program[_blitter.m_program.idx];
 			ProgramD3D9& program = m_program[_blitter.m_program.idx];
-			DX_CHECK(device->SetVertexShader( (IDirect3DVertexShader9*)program.m_vsh->m_ptr) );
-			DX_CHECK(device->SetPixelShader( (IDirect3DPixelShader9*)program.m_fsh->m_ptr) );
+			DX_CHECK(device->SetVertexShader(program.m_vsh->m_vertexShader) );
+			DX_CHECK(device->SetPixelShader(program.m_fsh->m_pixelShader) );
 
 
 			VertexBufferD3D9& vb = m_vertexBuffers[_blitter.m_vb->handle.idx];
 			VertexBufferD3D9& vb = m_vertexBuffers[_blitter.m_vb->handle.idx];
 			VertexDeclaration& vertexDecl = m_vertexDecls[_blitter.m_vb->decl.idx];
 			VertexDeclaration& vertexDecl = m_vertexDecls[_blitter.m_vb->decl.idx];
@@ -1735,13 +1736,15 @@ namespace bgfx
 
 
 		if (fragment)
 		if (fragment)
 		{
 		{
-			DX_CHECK(s_renderD3D9->m_device->CreatePixelShader(code, (IDirect3DPixelShader9**)&m_ptr) );
-			BGFX_FATAL(NULL != m_ptr, bgfx::Fatal::InvalidShader, "Failed to create fragment shader.");
+			m_type = 1;
+			DX_CHECK(s_renderD3D9->m_device->CreatePixelShader(code, &m_pixelShader) );
+			BGFX_FATAL(NULL != m_pixelShader, bgfx::Fatal::InvalidShader, "Failed to create fragment shader.");
 		}
 		}
 		else
 		else
 		{
 		{
-			DX_CHECK(s_renderD3D9->m_device->CreateVertexShader(code, (IDirect3DVertexShader9**)&m_ptr) );
-			BGFX_FATAL(NULL != m_ptr, bgfx::Fatal::InvalidShader, "Failed to create vertex shader.");
+			m_type = 0;
+			DX_CHECK(s_renderD3D9->m_device->CreateVertexShader(code, &m_vertexShader) );
+			BGFX_FATAL(NULL != m_vertexShader, bgfx::Fatal::InvalidShader, "Failed to create vertex shader.");
 		}
 		}
 	}
 	}
 
 
@@ -2451,8 +2454,8 @@ namespace bgfx
 
 
 		_render->sort();
 		_render->sort();
 
 
-		RenderState currentState;
-		currentState.reset();
+		RenderDraw currentState;
+		currentState.clear();
 		currentState.m_flags = BGFX_STATE_NONE;
 		currentState.m_flags = BGFX_STATE_NONE;
 		currentState.m_stencil = packStencil(BGFX_STENCIL_NONE, BGFX_STENCIL_NONE);
 		currentState.m_stencil = packStencil(BGFX_STENCIL_NONE, BGFX_STENCIL_NONE);
 
 
@@ -2496,21 +2499,28 @@ namespace bgfx
 		{
 		{
 			for (uint32_t item = 0, numItems = _render->m_num; item < numItems; ++item)
 			for (uint32_t item = 0, numItems = _render->m_num; item < numItems; ++item)
 			{
 			{
-				key.decode(_render->m_sortKeys[item]);
-				const RenderState& state = _render->m_renderState[_render->m_sortValues[item] ];
+				const bool isCompute = key.decode(_render->m_sortKeys[item]);
 
 
-				const uint64_t newFlags = state.m_flags;
-				uint64_t changedFlags = currentState.m_flags ^ state.m_flags;
+				if (isCompute)
+				{
+					BX_CHECK(false, "Compute is not supported on DirectX 9.");
+					continue;
+				}
+
+				const RenderDraw& draw = _render->m_renderItem[_render->m_sortValues[item] ].draw;
+
+				const uint64_t newFlags = draw.m_flags;
+				uint64_t changedFlags = currentState.m_flags ^ draw.m_flags;
 				currentState.m_flags = newFlags;
 				currentState.m_flags = newFlags;
 
 
-				const uint64_t newStencil = state.m_stencil;
-				uint64_t changedStencil = currentState.m_stencil ^ state.m_stencil;
+				const uint64_t newStencil = draw.m_stencil;
+				uint64_t changedStencil = currentState.m_stencil ^ draw.m_stencil;
 				currentState.m_stencil = newStencil;
 				currentState.m_stencil = newStencil;
 
 
 				if (key.m_view != view)
 				if (key.m_view != view)
 				{
 				{
 					currentState.clear();
 					currentState.clear();
-					currentState.m_scissor = !state.m_scissor;
+					currentState.m_scissor = !draw.m_scissor;
 					changedFlags = BGFX_STATE_MASK;
 					changedFlags = BGFX_STATE_MASK;
 					changedStencil = packStencil(BGFX_STENCIL_MASK, BGFX_STENCIL_MASK);
 					changedStencil = packStencil(BGFX_STENCIL_MASK, BGFX_STENCIL_MASK);
 					currentState.m_flags = newFlags;
 					currentState.m_flags = newFlags;
@@ -2590,7 +2600,7 @@ namespace bgfx
 					DX_CHECK(device->SetRenderState(D3DRS_ALPHAFUNC, D3DCMP_GREATER) );
 					DX_CHECK(device->SetRenderState(D3DRS_ALPHAFUNC, D3DCMP_GREATER) );
 				}
 				}
 
 
-				uint16_t scissor = state.m_scissor;
+				uint16_t scissor = draw.m_scissor;
 				if (currentState.m_scissor != scissor)
 				if (currentState.m_scissor != scissor)
 				{
 				{
 					currentState.m_scissor = scissor;
 					currentState.m_scissor = scissor;
@@ -2737,7 +2747,7 @@ namespace bgfx
 					}
 					}
 
 
 					if ( (BGFX_STATE_BLEND_MASK|BGFX_STATE_BLEND_EQUATION_MASK) & changedFlags
 					if ( (BGFX_STATE_BLEND_MASK|BGFX_STATE_BLEND_EQUATION_MASK) & changedFlags
-					||  blendFactor != state.m_rgba)
+					||  blendFactor != draw.m_rgba)
 					{
 					{
 						bool enabled = !!(BGFX_STATE_BLEND_MASK & newFlags);
 						bool enabled = !!(BGFX_STATE_BLEND_MASK & newFlags);
 						DX_CHECK(device->SetRenderState(D3DRS_ALPHABLENDENABLE, enabled) );
 						DX_CHECK(device->SetRenderState(D3DRS_ALPHABLENDENABLE, enabled) );
@@ -2770,9 +2780,9 @@ namespace bgfx
 							}
 							}
 
 
 							if ( (s_blendFactor[srcRGB].m_factor || s_blendFactor[dstRGB].m_factor)
 							if ( (s_blendFactor[srcRGB].m_factor || s_blendFactor[dstRGB].m_factor)
-							&&  blendFactor != state.m_rgba)
+							&&  blendFactor != draw.m_rgba)
 							{
 							{
-								const uint32_t rgba = state.m_rgba;
+								const uint32_t rgba = draw.m_rgba;
 								D3DCOLOR color = D3DCOLOR_RGBA(rgba>>24
 								D3DCOLOR color = D3DCOLOR_RGBA(rgba>>24
 															, (rgba>>16)&0xff
 															, (rgba>>16)&0xff
 															, (rgba>> 8)&0xff
 															, (rgba>> 8)&0xff
@@ -2782,7 +2792,7 @@ namespace bgfx
 							}
 							}
 						}
 						}
 
 
-						blendFactor = state.m_rgba;
+						blendFactor = draw.m_rgba;
 					}
 					}
 
 
 					const uint64_t pt = _render->m_debug&BGFX_DEBUG_WIREFRAME ? BGFX_STATE_PT_LINES : newFlags&BGFX_STATE_PT_MASK;
 					const uint64_t pt = _render->m_debug&BGFX_DEBUG_WIREFRAME ? BGFX_STATE_PT_LINES : newFlags&BGFX_STATE_PT_MASK;
@@ -2791,8 +2801,8 @@ namespace bgfx
 				}
 				}
 
 
 				bool programChanged = false;
 				bool programChanged = false;
-				bool constantsChanged = state.m_constBegin < state.m_constEnd;
-				rendererUpdateUniforms(this, _render->m_constantBuffer, state.m_constBegin, state.m_constEnd);
+				bool constantsChanged = draw.m_constBegin < draw.m_constEnd;
+				rendererUpdateUniforms(this, _render->m_constantBuffer, draw.m_constBegin, draw.m_constEnd);
 
 
 				if (key.m_program != programIdx)
 				if (key.m_program != programIdx)
 				{
 				{
@@ -2806,8 +2816,8 @@ namespace bgfx
 					else
 					else
 					{
 					{
 						ProgramD3D9& program = m_program[programIdx];
 						ProgramD3D9& program = m_program[programIdx];
-						device->SetVertexShader( (IDirect3DVertexShader9*)program.m_vsh->m_ptr);
-						device->SetPixelShader( (IDirect3DPixelShader9*)program.m_fsh->m_ptr);
+						device->SetVertexShader(program.m_vsh->m_vertexShader);
+						device->SetPixelShader(program.m_fsh->m_pixelShader);
 					}
 					}
 
 
 					programChanged = 
 					programChanged = 
@@ -2917,15 +2927,15 @@ namespace bgfx
 
 
 						case PredefinedUniform::Model:
 						case PredefinedUniform::Model:
 							{
 							{
- 								const Matrix4& model = _render->m_matrixCache.m_cache[state.m_matrix];
-								setShaderConstantF(flags, predefined.m_loc, model.un.val, bx::uint32_min(state.m_num*4, predefined.m_count) );
+ 								const Matrix4& model = _render->m_matrixCache.m_cache[draw.m_matrix];
+								setShaderConstantF(flags, predefined.m_loc, model.un.val, bx::uint32_min(draw.m_num*4, predefined.m_count) );
 							}
 							}
 							break;
 							break;
 
 
 						case PredefinedUniform::ModelView:
 						case PredefinedUniform::ModelView:
 							{
 							{
 								Matrix4 modelView;
 								Matrix4 modelView;
-								const Matrix4& model = _render->m_matrixCache.m_cache[state.m_matrix];
+								const Matrix4& model = _render->m_matrixCache.m_cache[draw.m_matrix];
 								bx::float4x4_mul(&modelView.un.f4x4, &model.un.f4x4, &_render->m_view[view].un.f4x4);
 								bx::float4x4_mul(&modelView.un.f4x4, &model.un.f4x4, &_render->m_view[view].un.f4x4);
 								setShaderConstantF(flags, predefined.m_loc, modelView.un.val, bx::uint32_min(4, predefined.m_count) );
 								setShaderConstantF(flags, predefined.m_loc, modelView.un.val, bx::uint32_min(4, predefined.m_count) );
 							}
 							}
@@ -2934,7 +2944,7 @@ namespace bgfx
 						case PredefinedUniform::ModelViewProj:
 						case PredefinedUniform::ModelViewProj:
 							{
 							{
 								Matrix4 modelViewProj;
 								Matrix4 modelViewProj;
-								const Matrix4& model = _render->m_matrixCache.m_cache[state.m_matrix];
+								const Matrix4& model = _render->m_matrixCache.m_cache[draw.m_matrix];
 								bx::float4x4_mul(&modelViewProj.un.f4x4, &model.un.f4x4, &viewProj[view].un.f4x4);
 								bx::float4x4_mul(&modelViewProj.un.f4x4, &model.un.f4x4, &viewProj[view].un.f4x4);
 								setShaderConstantF(flags, predefined.m_loc, modelViewProj.un.val, bx::uint32_min(4, predefined.m_count) );
 								setShaderConstantF(flags, predefined.m_loc, modelViewProj.un.val, bx::uint32_min(4, predefined.m_count) );
 							}
 							}
@@ -2958,7 +2968,7 @@ namespace bgfx
 					uint64_t flag = BGFX_STATE_TEX0;
 					uint64_t flag = BGFX_STATE_TEX0;
 					for (uint32_t stage = 0; stage < BGFX_STATE_TEX_COUNT; ++stage)
 					for (uint32_t stage = 0; stage < BGFX_STATE_TEX_COUNT; ++stage)
 					{
 					{
-						const Sampler& sampler = state.m_sampler[stage];
+						const Sampler& sampler = draw.m_sampler[stage];
 						Sampler& current = currentState.m_sampler[stage];
 						Sampler& current = currentState.m_sampler[stage];
 						if (current.m_idx != sampler.m_idx
 						if (current.m_idx != sampler.m_idx
 						||  current.m_flags != sampler.m_flags
 						||  current.m_flags != sampler.m_flags
@@ -2980,34 +2990,34 @@ namespace bgfx
 				}
 				}
 
 
 				if (programChanged
 				if (programChanged
-				||  currentState.m_vertexBuffer.idx != state.m_vertexBuffer.idx
-				||  currentState.m_instanceDataBuffer.idx != state.m_instanceDataBuffer.idx
-				||  currentState.m_instanceDataOffset != state.m_instanceDataOffset
-				||  currentState.m_instanceDataStride != state.m_instanceDataStride)
+				||  currentState.m_vertexBuffer.idx != draw.m_vertexBuffer.idx
+				||  currentState.m_instanceDataBuffer.idx != draw.m_instanceDataBuffer.idx
+				||  currentState.m_instanceDataOffset != draw.m_instanceDataOffset
+				||  currentState.m_instanceDataStride != draw.m_instanceDataStride)
 				{
 				{
-					currentState.m_vertexBuffer = state.m_vertexBuffer;
-					currentState.m_instanceDataBuffer.idx = state.m_instanceDataBuffer.idx;
-					currentState.m_instanceDataOffset = state.m_instanceDataOffset;
-					currentState.m_instanceDataStride = state.m_instanceDataStride;
+					currentState.m_vertexBuffer = draw.m_vertexBuffer;
+					currentState.m_instanceDataBuffer.idx = draw.m_instanceDataBuffer.idx;
+					currentState.m_instanceDataOffset = draw.m_instanceDataOffset;
+					currentState.m_instanceDataStride = draw.m_instanceDataStride;
 
 
-					uint16_t handle = state.m_vertexBuffer.idx;
+					uint16_t handle = draw.m_vertexBuffer.idx;
 					if (invalidHandle != handle)
 					if (invalidHandle != handle)
 					{
 					{
 						const VertexBufferD3D9& vb = m_vertexBuffers[handle];
 						const VertexBufferD3D9& vb = m_vertexBuffers[handle];
 
 
-						uint16_t decl = !isValid(vb.m_decl) ? state.m_vertexDecl.idx : vb.m_decl.idx;
+						uint16_t decl = !isValid(vb.m_decl) ? draw.m_vertexDecl.idx : vb.m_decl.idx;
 						const VertexDeclaration& vertexDecl = m_vertexDecls[decl];
 						const VertexDeclaration& vertexDecl = m_vertexDecls[decl];
 						DX_CHECK(device->SetStreamSource(0, vb.m_ptr, 0, vertexDecl.m_decl.m_stride) );
 						DX_CHECK(device->SetStreamSource(0, vb.m_ptr, 0, vertexDecl.m_decl.m_stride) );
 
 
-						if (isValid(state.m_instanceDataBuffer)
+						if (isValid(draw.m_instanceDataBuffer)
 						&&  m_instancing)
 						&&  m_instancing)
 						{
 						{
-							const VertexBufferD3D9& inst = m_vertexBuffers[state.m_instanceDataBuffer.idx];
-							DX_CHECK(device->SetStreamSourceFreq(0, D3DSTREAMSOURCE_INDEXEDDATA|state.m_numInstances) );
+							const VertexBufferD3D9& inst = m_vertexBuffers[draw.m_instanceDataBuffer.idx];
+							DX_CHECK(device->SetStreamSourceFreq(0, D3DSTREAMSOURCE_INDEXEDDATA|draw.m_numInstances) );
 							DX_CHECK(device->SetStreamSourceFreq(1, UINT(D3DSTREAMSOURCE_INSTANCEDATA|1) ) );
 							DX_CHECK(device->SetStreamSourceFreq(1, UINT(D3DSTREAMSOURCE_INSTANCEDATA|1) ) );
-							DX_CHECK(device->SetStreamSource(1, inst.m_ptr, state.m_instanceDataOffset, state.m_instanceDataStride) );
+							DX_CHECK(device->SetStreamSource(1, inst.m_ptr, draw.m_instanceDataOffset, draw.m_instanceDataStride) );
 
 
-							IDirect3DVertexDeclaration9* ptr = createVertexDeclaration(vertexDecl.m_decl, state.m_instanceDataStride/16);
+							IDirect3DVertexDeclaration9* ptr = createVertexDeclaration(vertexDecl.m_decl, draw.m_instanceDataStride/16);
 							DX_CHECK(device->SetVertexDeclaration(ptr) );
 							DX_CHECK(device->SetVertexDeclaration(ptr) );
 							DX_RELEASE(ptr, 0);
 							DX_RELEASE(ptr, 0);
 						}
 						}
@@ -3025,11 +3035,11 @@ namespace bgfx
 					}
 					}
 				}
 				}
 
 
-				if (currentState.m_indexBuffer.idx != state.m_indexBuffer.idx)
+				if (currentState.m_indexBuffer.idx != draw.m_indexBuffer.idx)
 				{
 				{
-					currentState.m_indexBuffer = state.m_indexBuffer;
+					currentState.m_indexBuffer = draw.m_indexBuffer;
 
 
-					uint16_t handle = state.m_indexBuffer.idx;
+					uint16_t handle = draw.m_indexBuffer.idx;
 					if (invalidHandle != handle)
 					if (invalidHandle != handle)
 					{
 					{
 						const IndexBufferD3D9& ib = m_indexBuffers[handle];
 						const IndexBufferD3D9& ib = m_indexBuffers[handle];
@@ -3043,11 +3053,11 @@ namespace bgfx
 
 
 				if (isValid(currentState.m_vertexBuffer) )
 				if (isValid(currentState.m_vertexBuffer) )
 				{
 				{
-					uint32_t numVertices = state.m_numVertices;
+					uint32_t numVertices = draw.m_numVertices;
 					if (UINT32_MAX == numVertices)
 					if (UINT32_MAX == numVertices)
 					{
 					{
 						const VertexBufferD3D9& vb = m_vertexBuffers[currentState.m_vertexBuffer.idx];
 						const VertexBufferD3D9& vb = m_vertexBuffers[currentState.m_vertexBuffer.idx];
-						uint16_t decl = !isValid(vb.m_decl) ? state.m_vertexDecl.idx : vb.m_decl.idx;
+						uint16_t decl = !isValid(vb.m_decl) ? draw.m_vertexDecl.idx : vb.m_decl.idx;
 						const VertexDeclaration& vertexDecl = m_vertexDecls[decl];
 						const VertexDeclaration& vertexDecl = m_vertexDecls[decl];
 						numVertices = vb.m_size/vertexDecl.m_decl.m_stride;
 						numVertices = vb.m_size/vertexDecl.m_decl.m_stride;
 					}
 					}
@@ -3057,35 +3067,35 @@ namespace bgfx
 					uint32_t numInstances = 0;
 					uint32_t numInstances = 0;
 					uint32_t numPrimsRendered = 0;
 					uint32_t numPrimsRendered = 0;
 
 
-					if (isValid(state.m_indexBuffer) )
+					if (isValid(draw.m_indexBuffer) )
 					{
 					{
-						if (UINT32_MAX == state.m_numIndices)
+						if (UINT32_MAX == draw.m_numIndices)
 						{
 						{
-							numIndices = m_indexBuffers[state.m_indexBuffer.idx].m_size/2;
+							numIndices = m_indexBuffers[draw.m_indexBuffer.idx].m_size/2;
 							numPrimsSubmitted = numIndices/prim.m_div - prim.m_sub;
 							numPrimsSubmitted = numIndices/prim.m_div - prim.m_sub;
-							numInstances = state.m_numInstances;
-							numPrimsRendered = numPrimsSubmitted*state.m_numInstances;
+							numInstances = draw.m_numInstances;
+							numPrimsRendered = numPrimsSubmitted*draw.m_numInstances;
 
 
 							DX_CHECK(device->DrawIndexedPrimitive(prim.m_type
 							DX_CHECK(device->DrawIndexedPrimitive(prim.m_type
-								, state.m_startVertex
+								, draw.m_startVertex
 								, 0
 								, 0
 								, numVertices
 								, numVertices
 								, 0
 								, 0
 								, numPrimsSubmitted
 								, numPrimsSubmitted
 								) );
 								) );
 						}
 						}
-						else if (prim.m_min <= state.m_numIndices)
+						else if (prim.m_min <= draw.m_numIndices)
 						{
 						{
-							numIndices = state.m_numIndices;
+							numIndices = draw.m_numIndices;
 							numPrimsSubmitted = numIndices/prim.m_div - prim.m_sub;
 							numPrimsSubmitted = numIndices/prim.m_div - prim.m_sub;
-							numInstances = state.m_numInstances;
-							numPrimsRendered = numPrimsSubmitted*state.m_numInstances;
+							numInstances = draw.m_numInstances;
+							numPrimsRendered = numPrimsSubmitted*draw.m_numInstances;
 
 
 							DX_CHECK(device->DrawIndexedPrimitive(prim.m_type
 							DX_CHECK(device->DrawIndexedPrimitive(prim.m_type
-								, state.m_startVertex
+								, draw.m_startVertex
 								, 0
 								, 0
 								, numVertices
 								, numVertices
-								, state.m_startIndex
+								, draw.m_startIndex
 								, numPrimsSubmitted
 								, numPrimsSubmitted
 								) );
 								) );
 						}
 						}
@@ -3093,11 +3103,11 @@ namespace bgfx
 					else
 					else
 					{
 					{
 						numPrimsSubmitted = numVertices/prim.m_div - prim.m_sub;
 						numPrimsSubmitted = numVertices/prim.m_div - prim.m_sub;
-						numInstances = state.m_numInstances;
-						numPrimsRendered = numPrimsSubmitted*state.m_numInstances;
+						numInstances = draw.m_numInstances;
+						numPrimsRendered = numPrimsSubmitted*draw.m_numInstances;
 
 
 						DX_CHECK(device->DrawPrimitive(prim.m_type
 						DX_CHECK(device->DrawPrimitive(prim.m_type
-							, state.m_startVertex
+							, draw.m_startVertex
 							, numPrimsSubmitted
 							, numPrimsSubmitted
 							) );
 							) );
 					}
 					}

+ 19 - 8
src/renderer_d3d9.h

@@ -218,9 +218,10 @@ namespace bgfx
 	struct ShaderD3D9
 	struct ShaderD3D9
 	{
 	{
 		ShaderD3D9()
 		ShaderD3D9()
-			: m_ptr(NULL)
+			: m_vertexShader(NULL)
 			, m_constantBuffer(NULL)
 			, m_constantBuffer(NULL)
 			, m_numPredefined(0)
 			, m_numPredefined(0)
+			, m_type(0)
 		{
 		{
 		}
 		}
 
 
@@ -236,23 +237,33 @@ namespace bgfx
 			}
 			}
 			m_numPredefined = 0;
 			m_numPredefined = 0;
 
 
-			DX_RELEASE(m_ptr, 0);
+			switch (m_type)
+			{
+			case 0:  DX_RELEASE(m_vertexShader, 0);
+			default: DX_RELEASE(m_pixelShader,  0);
+			}
 		}
 		}
 
 
-		IUnknown* m_ptr;
+		union
+		{
+			// X360 doesn't have interface inheritance (can't use IUnknown*).
+			IDirect3DVertexShader9* m_vertexShader;
+			IDirect3DPixelShader9*  m_pixelShader;
+		};
 		ConstantBuffer* m_constantBuffer;
 		ConstantBuffer* m_constantBuffer;
 		PredefinedUniform m_predefined[PredefinedUniform::Count];
 		PredefinedUniform m_predefined[PredefinedUniform::Count];
 		uint8_t m_numPredefined;
 		uint8_t m_numPredefined;
+		uint8_t m_type;
 	};
 	};
 
 
 	struct ProgramD3D9
 	struct ProgramD3D9
 	{
 	{
 		void create(const ShaderD3D9& _vsh, const ShaderD3D9& _fsh)
 		void create(const ShaderD3D9& _vsh, const ShaderD3D9& _fsh)
 		{
 		{
-			BX_CHECK(NULL != _vsh.m_ptr, "Vertex shader doesn't exist.");
+			BX_CHECK(NULL != _vsh.m_vertexShader, "Vertex shader doesn't exist.");
 			m_vsh = &_vsh;
 			m_vsh = &_vsh;
 
 
-			BX_CHECK(NULL != _fsh.m_ptr, "Fragment shader doesn't exist.");
+			BX_CHECK(NULL != _fsh.m_pixelShader, "Fragment shader doesn't exist.");
 			m_fsh = &_fsh;
 			m_fsh = &_fsh;
 
 
 			memcpy(&m_predefined[0], _vsh.m_predefined, _vsh.m_numPredefined*sizeof(PredefinedUniform) );
 			memcpy(&m_predefined[0], _vsh.m_predefined, _vsh.m_numPredefined*sizeof(PredefinedUniform) );
@@ -318,10 +329,10 @@ namespace bgfx
 	
 	
 		union
 		union
 		{
 		{
-			IDirect3DBaseTexture9* m_ptr;
-			IDirect3DTexture9* m_texture2d;
+			IDirect3DBaseTexture9*   m_ptr;
+			IDirect3DTexture9*       m_texture2d;
 			IDirect3DVolumeTexture9* m_texture3d;
 			IDirect3DVolumeTexture9* m_texture3d;
-			IDirect3DCubeTexture9* m_textureCube;
+			IDirect3DCubeTexture9*   m_textureCube;
 		};
 		};
 
 
 		IDirect3DSurface9* m_surface;
 		IDirect3DSurface9* m_surface;

+ 376 - 126
src/renderer_gl.cpp

@@ -66,6 +66,13 @@ namespace bgfx
 		"i_data4",
 		"i_data4",
 	};
 	};
 
 
+	static const GLenum s_access[Access::Count] =
+	{
+		GL_READ_ONLY,
+		GL_WRITE_ONLY,
+		GL_READ_WRITE,
+	};
+
 	static const GLenum s_attribType[AttribType::Count] =
 	static const GLenum s_attribType[AttribType::Count] =
 	{
 	{
 		GL_UNSIGNED_BYTE,
 		GL_UNSIGNED_BYTE,
@@ -169,7 +176,7 @@ namespace bgfx
 		bool m_supported;
 		bool m_supported;
 	};
 	};
 
 
-	static TextureFormatInfo s_textureFormat[TextureFormat::Count] =
+	static TextureFormatInfo s_textureFormat[] =
 	{
 	{
 		{ GL_COMPRESSED_RGBA_S3TC_DXT1_EXT,            GL_COMPRESSED_RGBA_S3TC_DXT1_EXT,            GL_ZERO,                        false }, // BC1
 		{ GL_COMPRESSED_RGBA_S3TC_DXT1_EXT,            GL_COMPRESSED_RGBA_S3TC_DXT1_EXT,            GL_ZERO,                        false }, // BC1
 		{ GL_COMPRESSED_RGBA_S3TC_DXT3_EXT,            GL_COMPRESSED_RGBA_S3TC_DXT3_EXT,            GL_ZERO,                        false }, // BC2
 		{ GL_COMPRESSED_RGBA_S3TC_DXT3_EXT,            GL_COMPRESSED_RGBA_S3TC_DXT3_EXT,            GL_ZERO,                        false }, // BC2
@@ -207,6 +214,47 @@ namespace bgfx
 		{ GL_DEPTH_COMPONENT32F,                       GL_DEPTH_COMPONENT,                          GL_FLOAT,                       false }, // D32F
 		{ GL_DEPTH_COMPONENT32F,                       GL_DEPTH_COMPONENT,                          GL_FLOAT,                       false }, // D32F
 		{ GL_STENCIL_INDEX8,                           GL_DEPTH_STENCIL,                            GL_UNSIGNED_BYTE,               false }, // D0S8
 		{ GL_STENCIL_INDEX8,                           GL_DEPTH_STENCIL,                            GL_UNSIGNED_BYTE,               false }, // D0S8
 	};
 	};
+	BX_STATIC_ASSERT(TextureFormat::Count == BX_COUNTOF(s_textureFormat) );
+
+	static GLenum s_imageFormat[] =
+	{
+		GL_ZERO,     // BC1
+		GL_ZERO,     // BC2
+		GL_ZERO,     // BC3
+		GL_ZERO,     // BC4
+		GL_ZERO,     // BC5
+		GL_ZERO,     // ETC1
+		GL_ZERO,     // ETC2
+		GL_ZERO,     // ETC2A
+		GL_ZERO,     // ETC2A1
+		GL_ZERO,     // PTC12
+		GL_ZERO,     // PTC14
+		GL_ZERO,     // PTC12A
+		GL_ZERO,     // PTC14A
+		GL_ZERO,     // PTC22
+		GL_ZERO,     // PTC24
+		GL_ZERO,     // Unknown
+		GL_R8,       // R8
+		GL_R16,      // R16
+		GL_R16F,     // R16F
+		GL_RGBA8,    // BGRA8
+		GL_RGBA16,   // RGBA16
+		GL_RGBA16,   // RGBA16F
+		GL_RGB565,   // R5G6B5
+		GL_RGBA4,    // RGBA4
+		GL_RGB5_A1,  // RGB5A1
+		GL_RGB10_A2, // RGB10A2
+		GL_ZERO,     // UnknownDepth
+		GL_ZERO,     // D16
+		GL_ZERO,     // D24
+		GL_ZERO,     // D24S8
+		GL_ZERO,     // D32
+		GL_ZERO,     // D16F
+		GL_ZERO,     // D24F
+		GL_ZERO,     // D32F
+		GL_ZERO,     // D0S8
+	};
+	BX_STATIC_ASSERT(TextureFormat::Count == BX_COUNTOF(s_imageFormat) );
 
 
 	struct Extension
 	struct Extension
 	{
 	{
@@ -224,6 +272,7 @@ namespace bgfx
 			APPLE_texture_format_BGRA8888,
 			APPLE_texture_format_BGRA8888,
 			APPLE_texture_max_level,
 			APPLE_texture_max_level,
 
 
+			ARB_compute_shader,
 			ARB_debug_label,
 			ARB_debug_label,
 			ARB_debug_output,
 			ARB_debug_output,
 			ARB_depth_clamp,
 			ARB_depth_clamp,
@@ -237,8 +286,11 @@ namespace bgfx
 			ARB_instanced_arrays,
 			ARB_instanced_arrays,
 			ARB_map_buffer_range,
 			ARB_map_buffer_range,
 			ARB_multisample,
 			ARB_multisample,
+			ARB_program_interface_query,
 			ARB_sampler_objects,
 			ARB_sampler_objects,
 			ARB_seamless_cube_map,
 			ARB_seamless_cube_map,
+			ARB_shader_image_load_store,
+			ARB_shader_storage_buffer_object,
 			ARB_shader_texture_lod,
 			ARB_shader_texture_lod,
 			ARB_texture_compression_rgtc,
 			ARB_texture_compression_rgtc,
 			ARB_texture_float,
 			ARB_texture_float,
@@ -269,6 +321,7 @@ namespace bgfx
 			EXT_framebuffer_sRGB,
 			EXT_framebuffer_sRGB,
 			EXT_occlusion_query_boolean,
 			EXT_occlusion_query_boolean,
 			EXT_read_format_bgra,
 			EXT_read_format_bgra,
+			EXT_shader_image_load_store,
 			EXT_shader_texture_lod,
 			EXT_shader_texture_lod,
 			EXT_shadow_samplers,
 			EXT_shadow_samplers,
 			EXT_texture_array,
 			EXT_texture_array,
@@ -297,6 +350,8 @@ namespace bgfx
 			IMG_texture_compression_pvrtc2,
 			IMG_texture_compression_pvrtc2,
 			IMG_texture_format_BGRA8888,
 			IMG_texture_format_BGRA8888,
 
 
+			INTEL_fragment_shader_ordering,
+
 			KHR_debug,
 			KHR_debug,
 
 
 			MOZ_WEBGL_compressed_texture_s3tc,
 			MOZ_WEBGL_compressed_texture_s3tc,
@@ -357,6 +412,7 @@ namespace bgfx
 		{ "APPLE_texture_format_BGRA8888",         false,                             true  },
 		{ "APPLE_texture_format_BGRA8888",         false,                             true  },
 		{ "APPLE_texture_max_level",               false,                             true  },
 		{ "APPLE_texture_max_level",               false,                             true  },
 
 
+		{ "ARB_compute_shader",                    BGFX_CONFIG_RENDERER_OPENGL >= 43, true  },
 		{ "ARB_debug_label",                       false,                             true  },
 		{ "ARB_debug_label",                       false,                             true  },
 		{ "ARB_debug_output",                      BGFX_CONFIG_RENDERER_OPENGL >= 43, true  },
 		{ "ARB_debug_output",                      BGFX_CONFIG_RENDERER_OPENGL >= 43, true  },
 		{ "ARB_depth_clamp",                       BGFX_CONFIG_RENDERER_OPENGL >= 32, true  },
 		{ "ARB_depth_clamp",                       BGFX_CONFIG_RENDERER_OPENGL >= 32, true  },
@@ -370,8 +426,11 @@ namespace bgfx
 		{ "ARB_instanced_arrays",                  BGFX_CONFIG_RENDERER_OPENGL >= 33, true  },
 		{ "ARB_instanced_arrays",                  BGFX_CONFIG_RENDERER_OPENGL >= 33, true  },
 		{ "ARB_map_buffer_range",                  BGFX_CONFIG_RENDERER_OPENGL >= 30, true  },
 		{ "ARB_map_buffer_range",                  BGFX_CONFIG_RENDERER_OPENGL >= 30, true  },
 		{ "ARB_multisample",                       false,                             true  },
 		{ "ARB_multisample",                       false,                             true  },
+		{ "ARB_program_interface_query",           BGFX_CONFIG_RENDERER_OPENGL >= 43, true  },
 		{ "ARB_sampler_objects",                   BGFX_CONFIG_RENDERER_OPENGL >= 33, true  },
 		{ "ARB_sampler_objects",                   BGFX_CONFIG_RENDERER_OPENGL >= 33, true  },
 		{ "ARB_seamless_cube_map",                 BGFX_CONFIG_RENDERER_OPENGL >= 32, true  },
 		{ "ARB_seamless_cube_map",                 BGFX_CONFIG_RENDERER_OPENGL >= 32, true  },
+		{ "ARB_shader_image_load_store",           BGFX_CONFIG_RENDERER_OPENGL >= 42, true  },
+		{ "ARB_shader_storage_buffer_object",      BGFX_CONFIG_RENDERER_OPENGL >= 43, true  },
 		{ "ARB_shader_texture_lod",                BGFX_CONFIG_RENDERER_OPENGL >= 30, true  },
 		{ "ARB_shader_texture_lod",                BGFX_CONFIG_RENDERER_OPENGL >= 30, true  },
 		{ "ARB_texture_compression_rgtc",          BGFX_CONFIG_RENDERER_OPENGL >= 30, true  },
 		{ "ARB_texture_compression_rgtc",          BGFX_CONFIG_RENDERER_OPENGL >= 30, true  },
 		{ "ARB_texture_float",                     BGFX_CONFIG_RENDERER_OPENGL >= 30, true  },
 		{ "ARB_texture_float",                     BGFX_CONFIG_RENDERER_OPENGL >= 30, true  },
@@ -402,6 +461,7 @@ namespace bgfx
 		{ "EXT_framebuffer_sRGB",                  BGFX_CONFIG_RENDERER_OPENGL >= 30, true  },
 		{ "EXT_framebuffer_sRGB",                  BGFX_CONFIG_RENDERER_OPENGL >= 30, true  },
 		{ "EXT_occlusion_query_boolean",           false,                             true  },
 		{ "EXT_occlusion_query_boolean",           false,                             true  },
 		{ "EXT_read_format_bgra",                  false,                             true  },
 		{ "EXT_read_format_bgra",                  false,                             true  },
+		{ "EXT_shader_image_load_store",           false,                             true  },
 		{ "EXT_shader_texture_lod",                false,                             true  }, // GLES2 extension.
 		{ "EXT_shader_texture_lod",                false,                             true  }, // GLES2 extension.
 		{ "EXT_shadow_samplers",                   false,                             true  },
 		{ "EXT_shadow_samplers",                   false,                             true  },
 		{ "EXT_texture_array",                     BGFX_CONFIG_RENDERER_OPENGL >= 30, true  },
 		{ "EXT_texture_array",                     BGFX_CONFIG_RENDERER_OPENGL >= 30, true  },
@@ -430,6 +490,8 @@ namespace bgfx
 		{ "IMG_texture_compression_pvrtc2",        false,                             true  },
 		{ "IMG_texture_compression_pvrtc2",        false,                             true  },
 		{ "IMG_texture_format_BGRA8888",           false,                             true  },
 		{ "IMG_texture_format_BGRA8888",           false,                             true  },
 
 
+		{ "INTEL_fragment_shader_ordering",        false,                             true  },
+
 		{ "KHR_debug",                             BGFX_CONFIG_RENDERER_OPENGL >= 43, true  },
 		{ "KHR_debug",                             BGFX_CONFIG_RENDERER_OPENGL >= 43, true  },
 
 
 		{ "MOZ_WEBGL_compressed_texture_s3tc",     false,                             true  },
 		{ "MOZ_WEBGL_compressed_texture_s3tc",     false,                             true  },
@@ -1127,6 +1189,12 @@ namespace bgfx
 				: 0
 				: 0
 				;
 				;
 
 
+			g_caps.supported |= !!(BGFX_CONFIG_RENDERER_OPENGLES >= 31)
+				|| s_extension[Extension::ARB_compute_shader].m_supported
+				? BGFX_CAPS_COMPUTE
+				: 0
+				;
+
 			if (s_extension[Extension::EXT_texture_filter_anisotropic].m_supported)
 			if (s_extension[Extension::EXT_texture_filter_anisotropic].m_supported)
 			{
 			{
 				GL_CHECK(glGetFloatv(GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT, &m_maxAnisotropy) );
 				GL_CHECK(glGetFloatv(GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT, &m_maxAnisotropy) );
@@ -2139,6 +2207,14 @@ namespace bgfx
 
 
 		switch (_type)
 		switch (_type)
 		{
 		{
+			GLSL_TYPE(GL_INT);
+			GLSL_TYPE(GL_INT_VEC2);
+			GLSL_TYPE(GL_INT_VEC3);
+			GLSL_TYPE(GL_INT_VEC4);		
+			GLSL_TYPE(GL_UNSIGNED_INT);
+			GLSL_TYPE(GL_UNSIGNED_INT_VEC2);
+			GLSL_TYPE(GL_UNSIGNED_INT_VEC3);
+			GLSL_TYPE(GL_UNSIGNED_INT_VEC4);
 			GLSL_TYPE(GL_FLOAT);
 			GLSL_TYPE(GL_FLOAT);
 			GLSL_TYPE(GL_FLOAT_VEC2);
 			GLSL_TYPE(GL_FLOAT_VEC2);
 			GLSL_TYPE(GL_FLOAT_VEC3);
 			GLSL_TYPE(GL_FLOAT_VEC3);
@@ -2158,10 +2234,15 @@ namespace bgfx
 			GLSL_TYPE(GL_SAMPLER_CUBE);
 			GLSL_TYPE(GL_SAMPLER_CUBE);
 // 			GLSL_TYPE(GL_SAMPLER_1D_SHADOW);
 // 			GLSL_TYPE(GL_SAMPLER_1D_SHADOW);
 			GLSL_TYPE(GL_SAMPLER_2D_SHADOW);
 			GLSL_TYPE(GL_SAMPLER_2D_SHADOW);
+			GLSL_TYPE(GL_IMAGE_1D);
+			GLSL_TYPE(GL_IMAGE_2D);
+			GLSL_TYPE(GL_IMAGE_3D);
+			GLSL_TYPE(GL_IMAGE_CUBE);
 		}
 		}
 
 
 #undef GLSL_TYPE
 #undef GLSL_TYPE
 
 
+		BX_CHECK(false, "Unknown GLSL type? %x", _type);
 		return "UNKNOWN GLSL TYPE!";
 		return "UNKNOWN GLSL TYPE!";
 	}
 	}
 
 
@@ -2188,6 +2269,7 @@ namespace bgfx
 
 
 #undef GLENUM
 #undef GLENUM
 
 
+		BX_WARN(false, "Unknown enum? %x", _enum);
 		return "<GLenum?>";
 		return "<GLenum?>";
 	}
 	}
 
 
@@ -2196,6 +2278,7 @@ namespace bgfx
 		switch (_type)
 		switch (_type)
 		{
 		{
 		case GL_INT:
 		case GL_INT:
+		case GL_UNSIGNED_INT:
 			return UniformType::Uniform1iv;
 			return UniformType::Uniform1iv;
 
 
 		case GL_FLOAT:
 		case GL_FLOAT:
@@ -2233,6 +2316,10 @@ namespace bgfx
 		case GL_SAMPLER_CUBE:
 		case GL_SAMPLER_CUBE:
 // 		case GL_SAMPLER_1D_SHADOW:
 // 		case GL_SAMPLER_1D_SHADOW:
  		case GL_SAMPLER_2D_SHADOW:
  		case GL_SAMPLER_2D_SHADOW:
+		case GL_IMAGE_1D:
+		case GL_IMAGE_2D:
+		case GL_IMAGE_3D:
+		case GL_IMAGE_CUBE:
 			return UniformType::Uniform1iv;
 			return UniformType::Uniform1iv;
 		};
 		};
 
 
@@ -2251,7 +2338,12 @@ namespace bgfx
 		if (!cached)
 		if (!cached)
 		{
 		{
 			GL_CHECK(glAttachShader(m_id, _vsh.m_id) );
 			GL_CHECK(glAttachShader(m_id, _vsh.m_id) );
-			GL_CHECK(glAttachShader(m_id, _fsh.m_id) );
+
+			if (0 != _fsh.m_id)
+			{
+				GL_CHECK(glAttachShader(m_id, _fsh.m_id) );
+			}
+
 			GL_CHECK(glLinkProgram(m_id) );
 			GL_CHECK(glLinkProgram(m_id) );
 
 
 			GLint linked = 0;
 			GLint linked = 0;
@@ -2277,7 +2369,11 @@ namespace bgfx
 			// Must be after init, otherwise init might fail to lookup shader
 			// Must be after init, otherwise init might fail to lookup shader
 			// info (NVIDIA Tegra 3 OpenGL ES 2.0 14.01003).
 			// info (NVIDIA Tegra 3 OpenGL ES 2.0 14.01003).
 			GL_CHECK(glDetachShader(m_id, _vsh.m_id) );
 			GL_CHECK(glDetachShader(m_id, _vsh.m_id) );
-			GL_CHECK(glDetachShader(m_id, _fsh.m_id) );
+
+			if (0 != _fsh.m_id)
+			{
+				GL_CHECK(glDetachShader(m_id, _fsh.m_id) );
+			}
 		}
 		}
 	}
 	}
 
 
@@ -2302,19 +2398,30 @@ namespace bgfx
 
 
 	void ProgramGL::init()
 	void ProgramGL::init()
 	{
 	{
-		GLint activeAttribs;
-		GLint activeUniforms;
+		GLint activeAttribs  = 0;
+		GLint activeUniforms = 0;
+		GLint activeBuffers  = 0;
 
 
 #if BGFX_CONFIG_RENDERER_OPENGL >= 31
 #if BGFX_CONFIG_RENDERER_OPENGL >= 31
 		GL_CHECK(glBindFragDataLocation(m_id, 0, "bgfx_FragColor") );
 		GL_CHECK(glBindFragDataLocation(m_id, 0, "bgfx_FragColor") );
 #endif // BGFX_CONFIG_RENDERER_OPENGL >= 31
 #endif // BGFX_CONFIG_RENDERER_OPENGL >= 31
 
 
-		GL_CHECK(glGetProgramiv(m_id, GL_ACTIVE_ATTRIBUTES, &activeAttribs) );
-		GL_CHECK(glGetProgramiv(m_id, GL_ACTIVE_UNIFORMS, &activeUniforms) );
+		if (s_extension[Extension::ARB_program_interface_query].m_supported
+		||  BX_ENABLED(BGFX_CONFIG_RENDERER_OPENGLES >= 31) )
+		{
+			GL_CHECK(glGetProgramInterfaceiv(m_id, GL_PROGRAM_INPUT,   GL_ACTIVE_RESOURCES, &activeAttribs ) );
+			GL_CHECK(glGetProgramInterfaceiv(m_id, GL_UNIFORM,         GL_ACTIVE_RESOURCES, &activeUniforms) );
+			GL_CHECK(glGetProgramInterfaceiv(m_id, GL_BUFFER_VARIABLE, GL_ACTIVE_RESOURCES, &activeBuffers ) );
+		}
+		else
+		{
+			GL_CHECK(glGetProgramiv(m_id, GL_ACTIVE_ATTRIBUTES, &activeAttribs ) );
+			GL_CHECK(glGetProgramiv(m_id, GL_ACTIVE_UNIFORMS,   &activeUniforms) );
+		}
 
 
 		GLint max0, max1;
 		GLint max0, max1;
 		GL_CHECK(glGetProgramiv(m_id, GL_ACTIVE_ATTRIBUTE_MAX_LENGTH, &max0) );
 		GL_CHECK(glGetProgramiv(m_id, GL_ACTIVE_ATTRIBUTE_MAX_LENGTH, &max0) );
-		GL_CHECK(glGetProgramiv(m_id, GL_ACTIVE_UNIFORM_MAX_LENGTH, &max1) );
+		GL_CHECK(glGetProgramiv(m_id, GL_ACTIVE_UNIFORM_MAX_LENGTH,   &max1) );
 		uint32_t maxLength = bx::uint32_max(max0, max1);
 		uint32_t maxLength = bx::uint32_max(max0, max1);
 		char* name = (char*)alloca(maxLength + 1);
 		char* name = (char*)alloca(maxLength + 1);
 
 
@@ -2365,7 +2472,11 @@ namespace bgfx
 			case GL_SAMPLER_3D:
 			case GL_SAMPLER_3D:
 			case GL_SAMPLER_CUBE:
 			case GL_SAMPLER_CUBE:
 			case GL_SAMPLER_2D_SHADOW:
 			case GL_SAMPLER_2D_SHADOW:
-				BX_TRACE("Sampler %d at %d.", m_numSamplers, loc);
+			case GL_IMAGE_1D:
+			case GL_IMAGE_2D:
+			case GL_IMAGE_3D:
+			case GL_IMAGE_CUBE:
+				BX_TRACE("Sampler #%d at location %d.", m_numSamplers, loc);
 				m_sampler[m_numSamplers] = loc;
 				m_sampler[m_numSamplers] = loc;
 				m_numSamplers++;
 				m_numSamplers++;
 				break;
 				break;
@@ -2405,6 +2516,76 @@ namespace bgfx
 			BX_UNUSED(offset);
 			BX_UNUSED(offset);
 		}
 		}
 
 
+		if (s_extension[Extension::ARB_program_interface_query].m_supported
+		||  BX_ENABLED(BGFX_CONFIG_RENDERER_OPENGLES >= 31) )
+		{
+			struct VariableInfo
+			{
+				GLenum type;
+//				GLint  loc;
+			};
+			VariableInfo vi;
+			GLenum props[] = { GL_TYPE };
+
+#if 0
+			BX_TRACE("Buffers (%d):", activeUniforms);
+			for (int32_t ii = 0; ii < activeUniforms; ++ii)
+			{
+				GL_CHECK(glGetProgramResourceiv(m_id
+					, GL_UNIFORM
+					, ii
+					, BX_COUNTOF(props)
+					, props
+					, BX_COUNTOF(props)
+					, NULL
+					, (GLint*)&vi
+					) );
+
+				GL_CHECK(glGetProgramResourceName(m_id
+					, GL_UNIFORM
+					, ii
+					, maxLength + 1
+					, NULL
+					, name
+					) );
+
+				BX_TRACE("\tuniform %s %s is at location %d"
+					, glslTypeName(vi.type)
+					, name
+					, 0 //vi.loc
+					);
+			}
+#endif // 0
+
+			BX_TRACE("Buffers (%d):", activeBuffers);
+			for (int32_t ii = 0; ii < activeBuffers; ++ii)
+			{
+				GL_CHECK(glGetProgramResourceiv(m_id
+					, GL_BUFFER_VARIABLE
+					, ii
+					, BX_COUNTOF(props)
+					, props
+					, BX_COUNTOF(props)
+					, NULL
+					, (GLint*)&vi
+					) );
+
+				GL_CHECK(glGetProgramResourceName(m_id
+					, GL_BUFFER_VARIABLE
+					, ii
+					, maxLength + 1
+					, NULL
+					, name
+					) );
+
+				BX_TRACE("\t%s %s at %d"
+					, glslTypeName(vi.type)
+					, name
+					, 0 //vi.loc
+					);
+			}
+		}
+
 		m_constantBuffer->finish();
 		m_constantBuffer->finish();
 
 
 		memset(m_attributes, 0xff, sizeof(m_attributes) );
 		memset(m_attributes, 0xff, sizeof(m_attributes) );
@@ -3076,6 +3257,11 @@ namespace bgfx
 		uint16_t count;
 		uint16_t count;
 		bx::read(&reader, count);
 		bx::read(&reader, count);
 
 
+		BX_TRACE("%s Shader consts %d"
+			, BGFX_CHUNK_MAGIC_FSH == magic ? "Fragment" : BGFX_CHUNK_MAGIC_VSH == magic ? "Vertex" : "Compute"
+			, count
+			);
+
 		for (uint32_t ii = 0; ii < count; ++ii)
 		for (uint32_t ii = 0; ii < count; ++ii)
 		{
 		{
 			uint8_t nameSize;
 			uint8_t nameSize;
@@ -3585,8 +3771,8 @@ namespace bgfx
 
 
 		_render->sort();
 		_render->sort();
 
 
-		RenderState currentState;
-		currentState.reset();
+		RenderDraw currentState;
+		currentState.clear();
 		currentState.m_flags = BGFX_STATE_NONE;
 		currentState.m_flags = BGFX_STATE_NONE;
 		currentState.m_stencil = packStencil(BGFX_STENCIL_NONE, BGFX_STENCIL_NONE);
 		currentState.m_stencil = packStencil(BGFX_STENCIL_NONE, BGFX_STENCIL_NONE);
 
 
@@ -3622,6 +3808,9 @@ namespace bgfx
 		viewScissorRect.clear();
 		viewScissorRect.clear();
 
 
 		const bool blendIndependentSupported = s_extension[Extension::ARB_draw_buffers_blend].m_supported;
 		const bool blendIndependentSupported = s_extension[Extension::ARB_draw_buffers_blend].m_supported;
+		const bool computeSupported = (BX_ENABLED(BGFX_CONFIG_RENDERER_OPENGL) && s_extension[Extension::ARB_compute_shader].m_supported)
+									|| BX_ENABLED(BGFX_CONFIG_RENDERER_OPENGLES >= 31)
+									;
 
 
 		uint32_t statsNumPrimsSubmitted[BX_COUNTOF(s_primInfo)] = {};
 		uint32_t statsNumPrimsSubmitted[BX_COUNTOF(s_primInfo)] = {};
 		uint32_t statsNumPrimsRendered[BX_COUNTOF(s_primInfo)] = {};
 		uint32_t statsNumPrimsRendered[BX_COUNTOF(s_primInfo)] = {};
@@ -3634,26 +3823,13 @@ namespace bgfx
 
 
 			for (uint32_t item = 0, numItems = _render->m_num; item < numItems; ++item)
 			for (uint32_t item = 0, numItems = _render->m_num; item < numItems; ++item)
 			{
 			{
-				key.decode(_render->m_sortKeys[item]);
-				const RenderState& state = _render->m_renderState[_render->m_sortValues[item] ];
+				const bool isCompute   = key.decode(_render->m_sortKeys[item]);
+				const bool viewChanged = key.m_view != view;
 
 
-				const uint64_t newFlags = state.m_flags;
-				uint64_t changedFlags = currentState.m_flags ^ state.m_flags;
-				currentState.m_flags = newFlags;
+				const RenderItem& renderItem = _render->m_renderItem[_render->m_sortValues[item] ];
 
 
-				const uint64_t newStencil = state.m_stencil;
-				uint64_t changedStencil = currentState.m_stencil ^ state.m_stencil;
-				currentState.m_stencil = newStencil;
-
-				if (key.m_view != view)
+				if (viewChanged)
 				{
 				{
-					currentState.clear();
-					currentState.m_scissor = !state.m_scissor;
-					changedFlags = BGFX_STATE_MASK;
-					changedStencil = packStencil(BGFX_STENCIL_MASK, BGFX_STENCIL_MASK);
-					currentState.m_flags = newFlags;
-					currentState.m_stencil = newStencil;
-
 					GL_CHECK(glInsertEventMarker(0, s_viewName[key.m_view]) );
 					GL_CHECK(glInsertEventMarker(0, s_viewName[key.m_view]) );
 
 
 					view = key.m_view;
 					view = key.m_view;
@@ -3686,7 +3862,81 @@ namespace bgfx
 					GL_CHECK(glDisable(GL_BLEND) );
 					GL_CHECK(glDisable(GL_BLEND) );
 				}
 				}
 
 
-				uint16_t scissor = state.m_scissor;
+				if (isCompute)
+				{
+					if (computeSupported)
+					{
+						const RenderCompute& compute = renderItem.compute;
+
+						ProgramGL& program = m_program[key.m_program];
+ 						GL_CHECK(glUseProgram(program.m_id) );
+
+						GLbitfield barrier = 0;
+						for (uint32_t ii = 0; ii < BGFX_MAX_COMPUTE_BINDINGS; ++ii)
+						{
+							const ComputeBinding& bind = compute.m_bind[ii];
+							if (invalidHandle != bind.m_idx)
+							{
+								switch (bind.m_type)
+								{
+								case ComputeBinding::Image:
+									{
+										const TextureGL& texture = m_textures[bind.m_idx];
+										GL_CHECK(glBindImageTexture(ii, texture.m_id, bind.m_mip, GL_FALSE, 0, s_access[bind.m_access], s_imageFormat[bind.m_format]) );
+										barrier |= GL_SHADER_IMAGE_ACCESS_BARRIER_BIT;
+									}
+									break;
+
+								case ComputeBinding::Buffer:
+									{
+// 										const VertexBufferGL& vertexBuffer = m_vertexBuffers[bind.m_idx];
+// 										GL_CHECK(glBindBufferBase(GL_SHADER_STORAGE_BUFFER, ii, vertexBuffer.m_id) ); 
+// 										barrier |= GL_SHADER_STORAGE_BARRIER_BIT;
+									}
+									break;
+								}
+							}
+						}
+
+						if (0 != barrier)
+						{
+							bool constantsChanged = compute.m_constBegin < compute.m_constEnd;
+							rendererUpdateUniforms(this, _render->m_constantBuffer, compute.m_constBegin, compute.m_constEnd);
+
+							if (constantsChanged)
+							{
+								commit(*program.m_constantBuffer);
+							}
+
+							GL_CHECK(glDispatchCompute(compute.m_numX, compute.m_numY, compute.m_numZ) );
+							GL_CHECK(glMemoryBarrier(barrier) );
+						}
+					}
+
+					continue;
+				}
+
+				const RenderDraw& draw = renderItem.draw;
+
+				const uint64_t newFlags = draw.m_flags;
+				uint64_t changedFlags = currentState.m_flags ^ draw.m_flags;
+				currentState.m_flags = newFlags;
+
+				const uint64_t newStencil = draw.m_stencil;
+				uint64_t changedStencil = currentState.m_stencil ^ draw.m_stencil;
+				currentState.m_stencil = newStencil;
+
+				if (viewChanged)
+				{
+					currentState.clear();
+					currentState.m_scissor = !draw.m_scissor;
+					changedFlags = BGFX_STATE_MASK;
+					changedStencil = packStencil(BGFX_STENCIL_MASK, BGFX_STENCIL_MASK);
+					currentState.m_flags = newFlags;
+					currentState.m_stencil = newStencil;
+				}
+
+				uint16_t scissor = draw.m_scissor;
 				if (currentState.m_scissor != scissor)
 				if (currentState.m_scissor != scissor)
 				{
 				{
 					currentState.m_scissor = scissor;
 					currentState.m_scissor = scissor;
@@ -3843,10 +4093,10 @@ namespace bgfx
 					}
 					}
 
 
 					if ( (BGFX_STATE_BLEND_MASK|BGFX_STATE_BLEND_EQUATION_MASK|BGFX_STATE_BLEND_INDEPENDENT) & changedFlags
 					if ( (BGFX_STATE_BLEND_MASK|BGFX_STATE_BLEND_EQUATION_MASK|BGFX_STATE_BLEND_INDEPENDENT) & changedFlags
-					||  blendFactor != state.m_rgba)
+					||  blendFactor != draw.m_rgba)
 					{
 					{
 						if ( (BGFX_STATE_BLEND_MASK|BGFX_STATE_BLEND_EQUATION_MASK|BGFX_STATE_BLEND_INDEPENDENT) & newFlags
 						if ( (BGFX_STATE_BLEND_MASK|BGFX_STATE_BLEND_EQUATION_MASK|BGFX_STATE_BLEND_INDEPENDENT) & newFlags
-						||  blendFactor != state.m_rgba)
+						||  blendFactor != draw.m_rgba)
 						{
 						{
 							const bool enabled = !!(BGFX_STATE_BLEND_MASK & newFlags);
 							const bool enabled = !!(BGFX_STATE_BLEND_MASK & newFlags);
 							const bool independent = !!(BGFX_STATE_BLEND_INDEPENDENT & newFlags)
 							const bool independent = !!(BGFX_STATE_BLEND_INDEPENDENT & newFlags)
@@ -3864,7 +4114,7 @@ namespace bgfx
 							const uint32_t equRGB = (equation   )&0x7;
 							const uint32_t equRGB = (equation   )&0x7;
 							const uint32_t equA   = (equation>>3)&0x7;
 							const uint32_t equA   = (equation>>3)&0x7;
 
 
-							const uint32_t numRt = s_renderGL->getNumRt();
+							const uint32_t numRt = getNumRt();
 
 
 							if (!BX_ENABLED(BGFX_CONFIG_RENDERER_OPENGL)
 							if (!BX_ENABLED(BGFX_CONFIG_RENDERER_OPENGL)
 							||  1 >= numRt
 							||  1 >= numRt
@@ -3881,9 +4131,9 @@ namespace bgfx
 									GL_CHECK(glBlendEquationSeparate(s_blendEquation[equRGB], s_blendEquation[equA]) );
 									GL_CHECK(glBlendEquationSeparate(s_blendEquation[equRGB], s_blendEquation[equA]) );
 
 
 									if ( (s_blendFactor[srcRGB].m_factor || s_blendFactor[dstRGB].m_factor)
 									if ( (s_blendFactor[srcRGB].m_factor || s_blendFactor[dstRGB].m_factor)
-									&&  blendFactor != state.m_rgba)
+									&&  blendFactor != draw.m_rgba)
 									{
 									{
-										const uint32_t rgba = state.m_rgba;
+										const uint32_t rgba = draw.m_rgba;
 										GLclampf rr = ( (rgba>>24)     )/255.0f;
 										GLclampf rr = ( (rgba>>24)     )/255.0f;
 										GLclampf gg = ( (rgba>>16)&0xff)/255.0f;
 										GLclampf gg = ( (rgba>>16)&0xff)/255.0f;
 										GLclampf bb = ( (rgba>> 8)&0xff)/255.0f;
 										GLclampf bb = ( (rgba>> 8)&0xff)/255.0f;
@@ -3918,7 +4168,7 @@ namespace bgfx
 									GL_CHECK(glDisablei(GL_BLEND, 0) );
 									GL_CHECK(glDisablei(GL_BLEND, 0) );
 								}
 								}
 
 
-								for (uint32_t ii = 1, rgba = state.m_rgba; ii < numRt; ++ii, rgba >>= 11)
+								for (uint32_t ii = 1, rgba = draw.m_rgba; ii < numRt; ++ii, rgba >>= 11)
 								{
 								{
 									if (0 != (rgba&0x7ff) )
 									if (0 != (rgba&0x7ff) )
 									{
 									{
@@ -3941,7 +4191,7 @@ namespace bgfx
 							GL_CHECK(glDisable(GL_BLEND) );
 							GL_CHECK(glDisable(GL_BLEND) );
 						}
 						}
 
 
-						blendFactor = state.m_rgba;
+						blendFactor = draw.m_rgba;
 					}
 					}
 
 
 					const uint64_t pt = _render->m_debug&BGFX_DEBUG_WIREFRAME ? BGFX_STATE_PT_LINES : newFlags&BGFX_STATE_PT_MASK;
 					const uint64_t pt = _render->m_debug&BGFX_DEBUG_WIREFRAME ? BGFX_STATE_PT_LINES : newFlags&BGFX_STATE_PT_MASK;
@@ -3950,14 +4200,14 @@ namespace bgfx
 				}
 				}
 
 
 				bool programChanged = false;
 				bool programChanged = false;
-				bool constantsChanged = state.m_constBegin < state.m_constEnd;
+				bool constantsChanged = draw.m_constBegin < draw.m_constEnd;
 				bool bindAttribs = false;
 				bool bindAttribs = false;
-				rendererUpdateUniforms(this, _render->m_constantBuffer, state.m_constBegin, state.m_constEnd);
+				rendererUpdateUniforms(this, _render->m_constantBuffer, draw.m_constBegin, draw.m_constEnd);
 
 
 				if (key.m_program != programIdx)
 				if (key.m_program != programIdx)
 				{
 				{
 					programIdx = key.m_program;
 					programIdx = key.m_program;
-					GLuint id = invalidHandle == programIdx ? 0 : s_renderGL->m_program[programIdx].m_id;
+					GLuint id = invalidHandle == programIdx ? 0 : m_program[programIdx].m_id;
 					GL_CHECK(glUseProgram(id) );
 					GL_CHECK(glUseProgram(id) );
 					programChanged =
 					programChanged =
 						constantsChanged =
 						constantsChanged =
@@ -3966,7 +4216,7 @@ namespace bgfx
 
 
 				if (invalidHandle != programIdx)
 				if (invalidHandle != programIdx)
 				{
 				{
-					ProgramGL& program = s_renderGL->m_program[programIdx];
+					ProgramGL& program = m_program[programIdx];
 
 
 					if (constantsChanged)
 					if (constantsChanged)
 					{
 					{
@@ -4086,9 +4336,9 @@ namespace bgfx
 
 
 						case PredefinedUniform::Model:
 						case PredefinedUniform::Model:
 							{
 							{
-								const Matrix4& model = _render->m_matrixCache.m_cache[state.m_matrix];
+								const Matrix4& model = _render->m_matrixCache.m_cache[draw.m_matrix];
 								GL_CHECK(glUniformMatrix4fv(predefined.m_loc
 								GL_CHECK(glUniformMatrix4fv(predefined.m_loc
-									, bx::uint32_min(predefined.m_count, state.m_num)
+									, bx::uint32_min(predefined.m_count, draw.m_num)
 									, GL_FALSE
 									, GL_FALSE
 									, model.un.val
 									, model.un.val
 									) );
 									) );
@@ -4098,7 +4348,7 @@ namespace bgfx
 						case PredefinedUniform::ModelView:
 						case PredefinedUniform::ModelView:
 							{
 							{
 								Matrix4 modelView;
 								Matrix4 modelView;
-								const Matrix4& model = _render->m_matrixCache.m_cache[state.m_matrix];
+								const Matrix4& model = _render->m_matrixCache.m_cache[draw.m_matrix];
 								bx::float4x4_mul(&modelView.un.f4x4, &model.un.f4x4, &_render->m_view[view].un.f4x4);
 								bx::float4x4_mul(&modelView.un.f4x4, &model.un.f4x4, &_render->m_view[view].un.f4x4);
 
 
 								GL_CHECK(glUniformMatrix4fv(predefined.m_loc
 								GL_CHECK(glUniformMatrix4fv(predefined.m_loc
@@ -4112,7 +4362,7 @@ namespace bgfx
 						case PredefinedUniform::ModelViewProj:
 						case PredefinedUniform::ModelViewProj:
 							{
 							{
 								Matrix4 modelViewProj;
 								Matrix4 modelViewProj;
-								const Matrix4& model = _render->m_matrixCache.m_cache[state.m_matrix];
+								const Matrix4& model = _render->m_matrixCache.m_cache[draw.m_matrix];
 								bx::float4x4_mul(&modelViewProj.un.f4x4, &model.un.f4x4, &viewProj[view].un.f4x4);
 								bx::float4x4_mul(&modelViewProj.un.f4x4, &model.un.f4x4, &viewProj[view].un.f4x4);
 
 
 								GL_CHECK(glUniformMatrix4fv(predefined.m_loc
 								GL_CHECK(glUniformMatrix4fv(predefined.m_loc
@@ -4139,7 +4389,7 @@ namespace bgfx
 						uint64_t flag = BGFX_STATE_TEX0;
 						uint64_t flag = BGFX_STATE_TEX0;
 						for (uint32_t stage = 0; stage < BGFX_STATE_TEX_COUNT; ++stage)
 						for (uint32_t stage = 0; stage < BGFX_STATE_TEX_COUNT; ++stage)
 						{
 						{
-							const Sampler& sampler = state.m_sampler[stage];
+							const Sampler& sampler = draw.m_sampler[stage];
 							Sampler& current = currentState.m_sampler[stage];
 							Sampler& current = currentState.m_sampler[stage];
 							if (current.m_idx != sampler.m_idx
 							if (current.m_idx != sampler.m_idx
 							||  current.m_flags != sampler.m_flags
 							||  current.m_flags != sampler.m_flags
@@ -4147,7 +4397,7 @@ namespace bgfx
 							{
 							{
 								if (invalidHandle != sampler.m_idx)
 								if (invalidHandle != sampler.m_idx)
 								{
 								{
-									TextureGL& texture = s_renderGL->m_textures[sampler.m_idx];
+									TextureGL& texture = m_textures[sampler.m_idx];
 									texture.commit(stage, sampler.m_flags);
 									texture.commit(stage, sampler.m_flags);
 								}
 								}
 							}
 							}
@@ -4158,33 +4408,33 @@ namespace bgfx
 					}
 					}
 
 
 					if (0 != defaultVao
 					if (0 != defaultVao
-					&&  0 == state.m_startVertex
-					&&  0 == state.m_instanceDataOffset)
+					&&  0 == draw.m_startVertex
+					&&  0 == draw.m_instanceDataOffset)
 					{
 					{
 						if (programChanged
 						if (programChanged
-						||  currentState.m_vertexBuffer.idx != state.m_vertexBuffer.idx
-						||  currentState.m_indexBuffer.idx != state.m_indexBuffer.idx
-						||  currentState.m_instanceDataBuffer.idx != state.m_instanceDataBuffer.idx
-						||  currentState.m_instanceDataOffset != state.m_instanceDataOffset
-						||  currentState.m_instanceDataStride != state.m_instanceDataStride)
+						||  currentState.m_vertexBuffer.idx != draw.m_vertexBuffer.idx
+						||  currentState.m_indexBuffer.idx != draw.m_indexBuffer.idx
+						||  currentState.m_instanceDataBuffer.idx != draw.m_instanceDataBuffer.idx
+						||  currentState.m_instanceDataOffset != draw.m_instanceDataOffset
+						||  currentState.m_instanceDataStride != draw.m_instanceDataStride)
 						{
 						{
 							bx::HashMurmur2A murmur;
 							bx::HashMurmur2A murmur;
 							murmur.begin();
 							murmur.begin();
-							murmur.add(state.m_vertexBuffer.idx);
-							murmur.add(state.m_indexBuffer.idx);
-							murmur.add(state.m_instanceDataBuffer.idx);
-							murmur.add(state.m_instanceDataOffset);
-							murmur.add(state.m_instanceDataStride);
+							murmur.add(draw.m_vertexBuffer.idx);
+							murmur.add(draw.m_indexBuffer.idx);
+							murmur.add(draw.m_instanceDataBuffer.idx);
+							murmur.add(draw.m_instanceDataOffset);
+							murmur.add(draw.m_instanceDataStride);
 							murmur.add(programIdx);
 							murmur.add(programIdx);
 							uint32_t hash = murmur.end();
 							uint32_t hash = murmur.end();
 
 
-							currentState.m_vertexBuffer = state.m_vertexBuffer;
-							currentState.m_indexBuffer = state.m_indexBuffer;
-							currentState.m_instanceDataOffset = state.m_instanceDataOffset;
-							currentState.m_instanceDataStride = state.m_instanceDataStride;
-							baseVertex = state.m_startVertex;
+							currentState.m_vertexBuffer = draw.m_vertexBuffer;
+							currentState.m_indexBuffer = draw.m_indexBuffer;
+							currentState.m_instanceDataOffset = draw.m_instanceDataOffset;
+							currentState.m_instanceDataStride = draw.m_instanceDataStride;
+							baseVertex = draw.m_startVertex;
 
 
-							GLuint id = s_renderGL->m_vaoStateCache.find(hash);
+							GLuint id = m_vaoStateCache.find(hash);
 							if (UINT32_MAX != id)
 							if (UINT32_MAX != id)
 							{
 							{
 								currentVao = id;
 								currentVao = id;
@@ -4192,28 +4442,28 @@ namespace bgfx
 							}
 							}
 							else
 							else
 							{
 							{
-								id = s_renderGL->m_vaoStateCache.add(hash);
+								id = m_vaoStateCache.add(hash);
 								currentVao = id;
 								currentVao = id;
 								GL_CHECK(glBindVertexArray(id) );
 								GL_CHECK(glBindVertexArray(id) );
 
 
-								ProgramGL& program = s_renderGL->m_program[programIdx];
+								ProgramGL& program = m_program[programIdx];
 								program.add(hash);
 								program.add(hash);
 
 
-								if (isValid(state.m_vertexBuffer) )
+								if (isValid(draw.m_vertexBuffer) )
 								{
 								{
-									VertexBufferGL& vb = s_renderGL->m_vertexBuffers[state.m_vertexBuffer.idx];
+									VertexBufferGL& vb = m_vertexBuffers[draw.m_vertexBuffer.idx];
 									vb.add(hash);
 									vb.add(hash);
 									GL_CHECK(glBindBuffer(GL_ARRAY_BUFFER, vb.m_id) );
 									GL_CHECK(glBindBuffer(GL_ARRAY_BUFFER, vb.m_id) );
 
 
-									uint16_t decl = !isValid(vb.m_decl) ? state.m_vertexDecl.idx : vb.m_decl.idx;
-									program.bindAttributes(s_renderGL->m_vertexDecls[decl], state.m_startVertex);
+									uint16_t decl = !isValid(vb.m_decl) ? draw.m_vertexDecl.idx : vb.m_decl.idx;
+									program.bindAttributes(m_vertexDecls[decl], draw.m_startVertex);
 
 
-									if (isValid(state.m_instanceDataBuffer) )
+									if (isValid(draw.m_instanceDataBuffer) )
 									{
 									{
-										VertexBufferGL& instanceVb = s_renderGL->m_vertexBuffers[state.m_instanceDataBuffer.idx];
+										VertexBufferGL& instanceVb = m_vertexBuffers[draw.m_instanceDataBuffer.idx];
 										instanceVb.add(hash);
 										instanceVb.add(hash);
 										GL_CHECK(glBindBuffer(GL_ARRAY_BUFFER, instanceVb.m_id) );
 										GL_CHECK(glBindBuffer(GL_ARRAY_BUFFER, instanceVb.m_id) );
-										program.bindInstanceData(state.m_instanceDataStride, state.m_instanceDataOffset);
+										program.bindInstanceData(draw.m_instanceDataStride, draw.m_instanceDataOffset);
 									}
 									}
 								}
 								}
 								else
 								else
@@ -4221,9 +4471,9 @@ namespace bgfx
 									GL_CHECK(glBindBuffer(GL_ARRAY_BUFFER, 0) );
 									GL_CHECK(glBindBuffer(GL_ARRAY_BUFFER, 0) );
 								}
 								}
 
 
-								if (isValid(state.m_indexBuffer) )
+								if (isValid(draw.m_indexBuffer) )
 								{
 								{
-									IndexBufferGL& ib = s_renderGL->m_indexBuffers[state.m_indexBuffer.idx];
+									IndexBufferGL& ib = m_indexBuffers[draw.m_indexBuffer.idx];
 									ib.add(hash);
 									ib.add(hash);
 									GL_CHECK(glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ib.m_id) );
 									GL_CHECK(glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ib.m_id) );
 								}
 								}
@@ -4247,20 +4497,20 @@ namespace bgfx
 						}
 						}
 
 
 						if (programChanged
 						if (programChanged
-						||  currentState.m_vertexBuffer.idx != state.m_vertexBuffer.idx
-						||  currentState.m_instanceDataBuffer.idx != state.m_instanceDataBuffer.idx
-						||  currentState.m_instanceDataOffset != state.m_instanceDataOffset
-						||  currentState.m_instanceDataStride != state.m_instanceDataStride)
+						||  currentState.m_vertexBuffer.idx != draw.m_vertexBuffer.idx
+						||  currentState.m_instanceDataBuffer.idx != draw.m_instanceDataBuffer.idx
+						||  currentState.m_instanceDataOffset != draw.m_instanceDataOffset
+						||  currentState.m_instanceDataStride != draw.m_instanceDataStride)
 						{
 						{
-							currentState.m_vertexBuffer = state.m_vertexBuffer;
-							currentState.m_instanceDataBuffer.idx = state.m_instanceDataBuffer.idx;
-							currentState.m_instanceDataOffset = state.m_instanceDataOffset;
-							currentState.m_instanceDataStride = state.m_instanceDataStride;
+							currentState.m_vertexBuffer = draw.m_vertexBuffer;
+							currentState.m_instanceDataBuffer.idx = draw.m_instanceDataBuffer.idx;
+							currentState.m_instanceDataOffset = draw.m_instanceDataOffset;
+							currentState.m_instanceDataStride = draw.m_instanceDataStride;
 
 
-							uint16_t handle = state.m_vertexBuffer.idx;
+							uint16_t handle = draw.m_vertexBuffer.idx;
 							if (invalidHandle != handle)
 							if (invalidHandle != handle)
 							{
 							{
-								VertexBufferGL& vb = s_renderGL->m_vertexBuffers[handle];
+								VertexBufferGL& vb = m_vertexBuffers[handle];
 								GL_CHECK(glBindBuffer(GL_ARRAY_BUFFER, vb.m_id) );
 								GL_CHECK(glBindBuffer(GL_ARRAY_BUFFER, vb.m_id) );
 								bindAttribs = true;
 								bindAttribs = true;
 							}
 							}
@@ -4270,14 +4520,14 @@ namespace bgfx
 							}
 							}
 						}
 						}
 
 
-						if (currentState.m_indexBuffer.idx != state.m_indexBuffer.idx)
+						if (currentState.m_indexBuffer.idx != draw.m_indexBuffer.idx)
 						{
 						{
-							currentState.m_indexBuffer = state.m_indexBuffer;
+							currentState.m_indexBuffer = draw.m_indexBuffer;
 
 
-							uint16_t handle = state.m_indexBuffer.idx;
+							uint16_t handle = draw.m_indexBuffer.idx;
 							if (invalidHandle != handle)
 							if (invalidHandle != handle)
 							{
 							{
-								IndexBufferGL& ib = s_renderGL->m_indexBuffers[handle];
+								IndexBufferGL& ib = m_indexBuffers[handle];
 								GL_CHECK(glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ib.m_id) );
 								GL_CHECK(glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ib.m_id) );
 							}
 							}
 							else
 							else
@@ -4288,19 +4538,19 @@ namespace bgfx
 
 
 						if (isValid(currentState.m_vertexBuffer) )
 						if (isValid(currentState.m_vertexBuffer) )
 						{
 						{
-							if (baseVertex != state.m_startVertex
+							if (baseVertex != draw.m_startVertex
 							||  bindAttribs)
 							||  bindAttribs)
 							{
 							{
-								baseVertex = state.m_startVertex;
-								const VertexBufferGL& vb = s_renderGL->m_vertexBuffers[state.m_vertexBuffer.idx];
-								uint16_t decl = !isValid(vb.m_decl) ? state.m_vertexDecl.idx : vb.m_decl.idx;
-								const ProgramGL& program = s_renderGL->m_program[programIdx];
-								program.bindAttributes(s_renderGL->m_vertexDecls[decl], state.m_startVertex);
+								baseVertex = draw.m_startVertex;
+								const VertexBufferGL& vb = m_vertexBuffers[draw.m_vertexBuffer.idx];
+								uint16_t decl = !isValid(vb.m_decl) ? draw.m_vertexDecl.idx : vb.m_decl.idx;
+								const ProgramGL& program = m_program[programIdx];
+								program.bindAttributes(m_vertexDecls[decl], draw.m_startVertex);
 
 
-								if (isValid(state.m_instanceDataBuffer) )
+								if (isValid(draw.m_instanceDataBuffer) )
 								{
 								{
-									GL_CHECK(glBindBuffer(GL_ARRAY_BUFFER, s_renderGL->m_vertexBuffers[state.m_instanceDataBuffer.idx].m_id) );
-									program.bindInstanceData(state.m_instanceDataStride, state.m_instanceDataOffset);
+									GL_CHECK(glBindBuffer(GL_ARRAY_BUFFER, m_vertexBuffers[draw.m_instanceDataBuffer.idx].m_id) );
+									program.bindInstanceData(draw.m_instanceDataStride, draw.m_instanceDataOffset);
 								}
 								}
 							}
 							}
 						}
 						}
@@ -4308,12 +4558,12 @@ namespace bgfx
 
 
 					if (isValid(currentState.m_vertexBuffer) )
 					if (isValid(currentState.m_vertexBuffer) )
 					{
 					{
-						uint32_t numVertices = state.m_numVertices;
+						uint32_t numVertices = draw.m_numVertices;
 						if (UINT32_MAX == numVertices)
 						if (UINT32_MAX == numVertices)
 						{
 						{
-							const VertexBufferGL& vb = s_renderGL->m_vertexBuffers[currentState.m_vertexBuffer.idx];
-							uint16_t decl = !isValid(vb.m_decl) ? state.m_vertexDecl.idx : vb.m_decl.idx;
-							const VertexDecl& vertexDecl = s_renderGL->m_vertexDecls[decl];
+							const VertexBufferGL& vb = m_vertexBuffers[currentState.m_vertexBuffer.idx];
+							uint16_t decl = !isValid(vb.m_decl) ? draw.m_vertexDecl.idx : vb.m_decl.idx;
+							const VertexDecl& vertexDecl = m_vertexDecls[decl];
 							numVertices = vb.m_size/vertexDecl.m_stride;
 							numVertices = vb.m_size/vertexDecl.m_stride;
 						}
 						}
 
 
@@ -4322,47 +4572,47 @@ namespace bgfx
 						uint32_t numInstances = 0;
 						uint32_t numInstances = 0;
 						uint32_t numPrimsRendered = 0;
 						uint32_t numPrimsRendered = 0;
 
 
-						if (isValid(state.m_indexBuffer) )
+						if (isValid(draw.m_indexBuffer) )
 						{
 						{
-							if (UINT32_MAX == state.m_numIndices)
+							if (UINT32_MAX == draw.m_numIndices)
 							{
 							{
-								numIndices = s_renderGL->m_indexBuffers[state.m_indexBuffer.idx].m_size/2;
+								numIndices = m_indexBuffers[draw.m_indexBuffer.idx].m_size/2;
 								numPrimsSubmitted = numIndices/prim.m_div - prim.m_sub;
 								numPrimsSubmitted = numIndices/prim.m_div - prim.m_sub;
-								numInstances = state.m_numInstances;
-								numPrimsRendered = numPrimsSubmitted*state.m_numInstances;
+								numInstances = draw.m_numInstances;
+								numPrimsRendered = numPrimsSubmitted*draw.m_numInstances;
 
 
 								GL_CHECK(glDrawElementsInstanced(prim.m_type
 								GL_CHECK(glDrawElementsInstanced(prim.m_type
 									, numIndices
 									, numIndices
 									, GL_UNSIGNED_SHORT
 									, GL_UNSIGNED_SHORT
 									, (void*)0
 									, (void*)0
-									, state.m_numInstances
+									, draw.m_numInstances
 									) );
 									) );
 							}
 							}
-							else if (prim.m_min <= state.m_numIndices)
+							else if (prim.m_min <= draw.m_numIndices)
 							{
 							{
-								numIndices = state.m_numIndices;
+								numIndices = draw.m_numIndices;
 								numPrimsSubmitted = numIndices/prim.m_div - prim.m_sub;
 								numPrimsSubmitted = numIndices/prim.m_div - prim.m_sub;
-								numInstances = state.m_numInstances;
-								numPrimsRendered = numPrimsSubmitted*state.m_numInstances;
+								numInstances = draw.m_numInstances;
+								numPrimsRendered = numPrimsSubmitted*draw.m_numInstances;
 
 
 								GL_CHECK(glDrawElementsInstanced(prim.m_type
 								GL_CHECK(glDrawElementsInstanced(prim.m_type
 									, numIndices
 									, numIndices
 									, GL_UNSIGNED_SHORT
 									, GL_UNSIGNED_SHORT
-									, (void*)(uintptr_t)(state.m_startIndex*2)
-									, state.m_numInstances
+									, (void*)(uintptr_t)(draw.m_startIndex*2)
+									, draw.m_numInstances
 									) );
 									) );
 							}
 							}
 						}
 						}
 						else
 						else
 						{
 						{
 							numPrimsSubmitted = numVertices/prim.m_div - prim.m_sub;
 							numPrimsSubmitted = numVertices/prim.m_div - prim.m_sub;
-							numInstances = state.m_numInstances;
-							numPrimsRendered = numPrimsSubmitted*state.m_numInstances;
+							numInstances = draw.m_numInstances;
+							numPrimsRendered = numPrimsSubmitted*draw.m_numInstances;
 
 
 							GL_CHECK(glDrawArraysInstanced(prim.m_type
 							GL_CHECK(glDrawArraysInstanced(prim.m_type
 								, 0
 								, 0
 								, numVertices
 								, numVertices
-								, state.m_numInstances
+								, draw.m_numInstances
 								) );
 								) );
 						}
 						}
 
 
@@ -4374,12 +4624,12 @@ namespace bgfx
 				}
 				}
 			}
 			}
 
 
-			s_renderGL->blitMsaaFbo();
+			blitMsaaFbo();
 
 
 			if (0 < _render->m_num)
 			if (0 < _render->m_num)
 			{
 			{
 				captureElapsed = -bx::getHPCounter();
 				captureElapsed = -bx::getHPCounter();
-				s_renderGL->capture();
+				capture();
 				captureElapsed += bx::getHPCounter();
 				captureElapsed += bx::getHPCounter();
 			}
 			}
 		}
 		}
@@ -4400,12 +4650,12 @@ namespace bgfx
 		{
 		{
 			double elapsedGpuMs = 0.0;
 			double elapsedGpuMs = 0.0;
 #if BGFX_CONFIG_RENDERER_OPENGL
 #if BGFX_CONFIG_RENDERER_OPENGL
-			s_renderGL->m_queries.end(GL_TIME_ELAPSED);
-			uint64_t elapsedGl = s_renderGL->m_queries.getResult(0);
+			m_queries.end(GL_TIME_ELAPSED);
+			uint64_t elapsedGl = m_queries.getResult(0);
 			elapsedGpuMs = double(elapsedGl)/1e6;
 			elapsedGpuMs = double(elapsedGl)/1e6;
 #endif // BGFX_CONFIG_RENDERER_OPENGL
 #endif // BGFX_CONFIG_RENDERER_OPENGL
 
 
-			TextVideoMem& tvm = s_renderGL->m_textVideoMem;
+			TextVideoMem& tvm = m_textVideoMem;
 
 
 			static int64_t next = now;
 			static int64_t next = now;
 
 
@@ -4420,10 +4670,10 @@ namespace bgfx
 				tvm.printf(0, pos++, BGFX_CONFIG_DEBUG ? 0x89 : 0x8f, " %s / " BX_COMPILER_NAME " / " BX_CPU_NAME " / " BX_ARCH_NAME " / " BX_PLATFORM_NAME " "
 				tvm.printf(0, pos++, BGFX_CONFIG_DEBUG ? 0x89 : 0x8f, " %s / " BX_COMPILER_NAME " / " BX_CPU_NAME " / " BX_ARCH_NAME " / " BX_PLATFORM_NAME " "
 					, getRendererName()
 					, getRendererName()
 					);
 					);
-				tvm.printf(0, pos++, 0x0f, "      Vendor: %s", s_renderGL->m_vendor);
-				tvm.printf(0, pos++, 0x0f, "    Renderer: %s", s_renderGL->m_renderer);
-				tvm.printf(0, pos++, 0x0f, "     Version: %s", s_renderGL->m_version);
-				tvm.printf(0, pos++, 0x0f, "GLSL version: %s", s_renderGL->m_glslVersion);
+				tvm.printf(0, pos++, 0x0f, "      Vendor: %s", m_vendor);
+				tvm.printf(0, pos++, 0x0f, "    Renderer: %s", m_renderer);
+				tvm.printf(0, pos++, 0x0f, "     Version: %s", m_version);
+				tvm.printf(0, pos++, 0x0f, "GLSL version: %s", m_glslVersion);
 
 
 				pos = 10;
 				pos = 10;
 				tvm.printf(10, pos++, 0x8e, "      Frame CPU: %7.3f, % 7.3f \x1f, % 7.3f \x1e [ms] / % 6.2f FPS "
 				tvm.printf(10, pos++, 0x8e, "      Frame CPU: %7.3f, % 7.3f \x1f, % 7.3f \x1e [ms] / % 6.2f FPS "

+ 80 - 0
src/renderer_gl.h

@@ -340,6 +340,86 @@ typedef uint64_t GLuint64;
 #	define GL_COMPUTE_SHADER 0x91B9
 #	define GL_COMPUTE_SHADER 0x91B9
 #endif // GL_COMPUTE_SHADER
 #endif // GL_COMPUTE_SHADER
 
 
+#ifndef GL_READ_ONLY
+#	define GL_READ_ONLY 0x88B8
+#endif // GL_READ_ONLY
+
+#ifndef GL_WRITE_ONLY
+#	define GL_WRITE_ONLY 0x88B9
+#endif // GL_WRITE_ONLY
+
+#ifndef GL_READ_WRITE
+#	define GL_READ_WRITE 0x88BA
+#endif // GL_READ_WRITE
+
+#ifndef GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT
+#	define GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT 0x00000001
+#endif // GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT
+
+#ifndef GL_ELEMENT_ARRAY_BARRIER_BIT
+#	define GL_ELEMENT_ARRAY_BARRIER_BIT 0x00000002
+#endif // GL_ELEMENT_ARRAY_BARRIER_BIT
+
+#ifndef GL_SHADER_IMAGE_ACCESS_BARRIER_BIT
+#	define GL_SHADER_IMAGE_ACCESS_BARRIER_BIT 0x00000020
+#endif // GL_SHADER_IMAGE_ACCESS_BARRIER_BIT
+
+#ifndef GL_SHADER_STORAGE_BARRIER_BIT
+#	define GL_SHADER_STORAGE_BARRIER_BIT 0x00002000
+#endif // GL_SHADER_STORAGE_BARRIER_BIT
+
+#ifndef GL_SHADER_STORAGE_BUFFER
+#	define GL_SHADER_STORAGE_BUFFER 0x90D2
+#endif // GL_SHADER_STORAGE_BUFFER
+
+#ifndef GL_IMAGE_1D
+#	define GL_IMAGE_1D 0x904C
+#endif // GL_IMAGE_1D
+
+#ifndef GL_IMAGE_2D
+#	define GL_IMAGE_2D 0x904D
+#endif // GL_IMAGE_2D
+
+#ifndef GL_IMAGE_3D
+#	define GL_IMAGE_3D 0x904E
+#endif // GL_IMAGE_3D
+
+#ifndef GL_IMAGE_CUBE
+#	define GL_IMAGE_CUBE 0x9050
+#endif // GL_IMAGE_CUBE
+
+#ifndef GL_PROGRAM_INPUT
+#	define GL_PROGRAM_INPUT 0x92E3
+#endif // GL_PROGRAM_INPUT
+
+#ifndef GL_ACTIVE_RESOURCES
+#	define GL_ACTIVE_RESOURCES 0x92F5
+#endif // GL_ACTIVE_RESOURCES
+
+#ifndef GL_UNIFORM
+#	define GL_UNIFORM 0x92E1
+#endif // GL_UNIFORM
+
+#ifndef GL_BUFFER_VARIABLE
+#	define GL_BUFFER_VARIABLE 0x92E5
+#endif // GL_BUFFER_VARIABLE
+
+#ifndef GL_UNSIGNED_INT_VEC2
+#	define GL_UNSIGNED_INT_VEC2 0x8DC6
+#endif // GL_UNSIGNED_INT_VEC2
+
+#ifndef GL_UNSIGNED_INT_VEC3
+#	define GL_UNSIGNED_INT_VEC3 0x8DC7
+#endif // GL_UNSIGNED_INT_VEC3
+
+#ifndef GL_UNSIGNED_INT_VEC4
+#	define GL_UNSIGNED_INT_VEC4 0x8DC8
+#endif // GL_UNSIGNED_INT_VEC4
+
+#ifndef GL_TYPE
+#	define GL_TYPE 0x92FA
+#endif // GL_TYPE
+
 #if BX_PLATFORM_NACL
 #if BX_PLATFORM_NACL
 #	include "glcontext_ppapi.h"
 #	include "glcontext_ppapi.h"
 #elif BX_PLATFORM_WINDOWS
 #elif BX_PLATFORM_WINDOWS

+ 227 - 45
tools/shaderc/shaderc.cpp

@@ -1651,6 +1651,7 @@ void addFragData(Preprocessor& _preprocessor, char* _data, uint32_t _idx, bool _
 // 4.1    410
 // 4.1    410
 // 4.2    420               11.0     vhdgf+c  5.0
 // 4.2    420               11.0     vhdgf+c  5.0
 // 4.3    430      vhdgf+c
 // 4.3    430      vhdgf+c
+// 4.4    440
 
 
 void help(const char* _error = NULL)
 void help(const char* _error = NULL)
 {
 {
@@ -1991,8 +1992,6 @@ int main(int _argc, const char* _argv[])
 			}
 			}
 		}
 		}
 
 
-BX_TRACE("1");
-
 		InOut shaderInputs;
 		InOut shaderInputs;
 		InOut shaderOutputs;
 		InOut shaderOutputs;
 		uint32_t inputHash = 0;
 		uint32_t inputHash = 0;
@@ -2056,75 +2055,255 @@ BX_TRACE("1");
 			}
 			}
 		}
 		}
 
 
-BX_TRACE("2");
-
 		if (raw)
 		if (raw)
 		{
 		{
+			bx::CrtFileWriter* writer = NULL;
+
+			if (NULL != bin2c)
+			{
+				writer = new Bin2cWriter(bin2c);
+			}
+			else
 			{
 			{
-				bx::CrtFileWriter* writer = NULL;
+				writer = new bx::CrtFileWriter;
+			}
 
 
-				if (NULL != bin2c)
-				{
-					writer = new Bin2cWriter(bin2c);
-				}
-				else
-				{
-					writer = new bx::CrtFileWriter;
-				}
+			if (0 != writer->open(outFilePath) )
+			{
+				fprintf(stderr, "Unable to open output file '%s'.", outFilePath);
+				return EXIT_FAILURE;
+			}
 
 
-				if (0 != writer->open(outFilePath) )
-				{
-					fprintf(stderr, "Unable to open output file '%s'.", outFilePath);
-					return EXIT_FAILURE;
-				}
+			uint32_t inputHash = 0;
+			uint32_t outputHash = 0;
 
 
-				uint32_t inputHash = 0;
-				uint32_t outputHash = 0;
+			if ('f' == shaderType)
+			{
+				bx::write(writer, BGFX_CHUNK_MAGIC_FSH);
+				bx::write(writer, inputHash);
+			}
+			else if ('v' == shaderType)
+			{
+				bx::write(writer, BGFX_CHUNK_MAGIC_VSH);
+				bx::write(writer, outputHash);
+			}
+			else
+			{
+				bx::write(writer, BGFX_CHUNK_MAGIC_CSH);
+				bx::write(writer, outputHash);
+			}
 
 
-				if ('f' == shaderType)
-				{
-					bx::write(writer, BGFX_CHUNK_MAGIC_FSH);
-					bx::write(writer, inputHash);
-				}
-				else if ('v' == shaderType)
+			if (glsl)
+			{
+				bx::write(writer, uint16_t(0) );
+
+				uint32_t shaderSize = (uint32_t)strlen(input);
+				bx::write(writer, shaderSize);
+				bx::write(writer, input, shaderSize);
+				bx::write(writer, uint8_t(0) );
+
+				compiled = true;
+			}
+			else
+			{
+				if (hlsl > 3)
 				{
 				{
-					bx::write(writer, BGFX_CHUNK_MAGIC_VSH);
-					bx::write(writer, outputHash);
+					compiled = compileHLSLShaderDx11(cmdLine, input, writer);
 				}
 				}
 				else
 				else
 				{
 				{
-					bx::write(writer, BGFX_CHUNK_MAGIC_CSH);
-					bx::write(writer, outputHash);
+					compiled = compileHLSLShaderDx9(cmdLine, input, writer);
 				}
 				}
+			}
 
 
+			writer->close();
+			delete writer;
+		}
+		else if ('c' == shaderType) // Compute
+		{
+			char* entry = strstr(input, "void main()");
+			if (NULL == entry)
+			{
+				fprintf(stderr, "Shader entry point 'void main()' is not found.\n");
+			}
+			else
+			{
 				if (glsl)
 				if (glsl)
 				{
 				{
-					bx::write(writer, uint16_t(0) );
-
-					uint32_t shaderSize = (uint32_t)strlen(input);
-					bx::write(writer, shaderSize);
-					bx::write(writer, input, shaderSize);
-					bx::write(writer, uint8_t(0) );
-
-					compiled = true;
 				}
 				}
 				else
 				else
 				{
 				{
-					if (hlsl > 3)
+					preprocessor.writef(
+						"#define lowp\n"
+						"#define mediump\n"
+						"#define highp\n"
+						"#define ivec2 int2\n"
+						"#define ivec3 int3\n"
+						"#define ivec4 int4\n"
+						"#define uvec2 uint2\n"
+						"#define uvec3 uint3\n"
+						"#define uvec4 uint4\n"
+						"#define vec2 float2\n"
+						"#define vec3 float3\n"
+						"#define vec4 float4\n"
+						"#define mat2 float2x2\n"
+						"#define mat3 float3x3\n"
+						"#define mat4 float4x4\n"
+						);
+
+					entry[4] = '_';
+
+					preprocessor.writef("#define void_main()");
+					preprocessor.writef(" \\\n\tvoid main(");
+
+					uint32_t arg = 0;
+
+					const bool hasLocalInvocationID    = NULL != strstr(input, "gl_LocalInvocationID");
+					const bool hasLocalInvocationIndex = NULL != strstr(input, "gl_LocalInvocationIndex");
+					const bool hasGlobalInvocationID   = NULL != strstr(input, "gl_GlobalInvocationID");
+					const bool hasWorkGroupID          = NULL != strstr(input, "gl_WorkGroupID");
+
+					if (hasLocalInvocationID)
+					{
+						preprocessor.writef(
+							" \\\n\t%sint3 gl_LocalInvocationID : SV_GroupThreadID"
+							, arg++ > 0 ? ", " : "  "
+							);
+					}
+
+					if (hasLocalInvocationIndex)
 					{
 					{
-						compiled = compileHLSLShaderDx11(cmdLine, preprocessor.m_preprocessed, writer);
+						preprocessor.writef(
+							" \\\n\t%sint gl_LocalInvocationIndex : SV_GroupIndex"
+							, arg++ > 0 ? ", " : "  "
+							);
 					}
 					}
-					else
+
+					if (hasGlobalInvocationID)
 					{
 					{
-						compiled = compileHLSLShaderDx9(cmdLine, preprocessor.m_preprocessed, writer);
+						preprocessor.writef(
+							" \\\n\t%sint3 gl_GlobalInvocationID : SV_DispatchThreadID"
+							, arg++ > 0 ? ", " : "  "
+							);
 					}
 					}
+
+					if (hasWorkGroupID)
+					{
+						preprocessor.writef(
+							" \\\n\t%sint3 gl_WorkGroupID : SV_GroupID"
+							, arg++ > 0 ? ", " : "  "
+							);
+					}
+
+					preprocessor.writef(
+						" \\\n\t)\n"
+						);
 				}
 				}
 
 
-				writer->close();
-				delete writer;
+				if (preprocessor.run(input) )
+				{
+					BX_TRACE("Input file: %s", filePath);
+					BX_TRACE("Output file: %s", outFilePath);
+
+					if (preprocessOnly)
+					{
+						bx::CrtFileWriter writer;
+
+						if (0 != writer.open(outFilePath) )
+						{
+							fprintf(stderr, "Unable to open output file '%s'.", outFilePath);
+							return EXIT_FAILURE;
+						}
+
+						writer.write(preprocessor.m_preprocessed.c_str(), (int32_t)preprocessor.m_preprocessed.size() );
+						writer.close();
+
+						return EXIT_SUCCESS;
+					}
+
+					{
+						bx::CrtFileWriter* writer = NULL;
+
+						if (NULL != bin2c)
+						{
+							writer = new Bin2cWriter(bin2c);
+						}
+						else
+						{
+							writer = new bx::CrtFileWriter;
+						}
+
+						if (0 != writer->open(outFilePath) )
+						{
+							fprintf(stderr, "Unable to open output file '%s'.", outFilePath);
+							return EXIT_FAILURE;
+						}
+
+						bx::write(writer, BGFX_CHUNK_MAGIC_CSH);
+						bx::write(writer, outputHash);
+
+						if (glsl)
+						{
+							std::string code;
+
+							if (gles)
+							{
+								bx::stringPrintf(code, "#version 310 es\n");
+							}
+							else
+							{
+								int32_t version = atoi(profile);
+								bx::stringPrintf(code, "#version %d\n", version == 0 ? 430 : version);
+							}
+
+							code += preprocessor.m_preprocessed;
+#if 1
+							bx::write(writer, uint16_t(0) );
+
+							uint32_t shaderSize = (uint32_t)code.size();
+							bx::write(writer, shaderSize);
+							bx::write(writer, code.c_str(), shaderSize);
+							bx::write(writer, uint8_t(0) );
+
+							compiled = true;
+#else
+							compiled = compileGLSLShader(cmdLine, gles, code, writer);
+#endif // 0
+						}
+						else
+						{
+							if (hlsl > 3)
+							{
+								compiled = compileHLSLShaderDx11(cmdLine, preprocessor.m_preprocessed, writer);
+							}
+							else
+							{
+								compiled = compileHLSLShaderDx9(cmdLine, preprocessor.m_preprocessed, writer);
+							}
+						}
+
+						writer->close();
+						delete writer;
+					}
+
+					if (compiled)
+					{
+						if (depends)
+						{
+							std::string ofp = outFilePath;
+							ofp += ".d";
+							bx::CrtFileWriter writer;
+							if (0 == writer.open(ofp.c_str() ) )
+							{
+								writef(&writer, "%s : %s\n", outFilePath, preprocessor.m_depends.c_str() );
+								writer.close();
+							}
+						}
+					}
+				}
 			}
 			}
 		}
 		}
-		else
+		else // Vertex/Fragment
 		{
 		{
 			char* entry = strstr(input, "void main()");
 			char* entry = strstr(input, "void main()");
 			if (NULL == entry)
 			if (NULL == entry)
@@ -2204,6 +2383,9 @@ BX_TRACE("2");
 						"#define ivec2 int2\n"
 						"#define ivec2 int2\n"
 						"#define ivec3 int3\n"
 						"#define ivec3 int3\n"
 						"#define ivec4 int4\n"
 						"#define ivec4 int4\n"
+						"#define uvec2 uint2\n"
+						"#define uvec3 uint3\n"
+						"#define uvec4 uint4\n"
 						"#define vec2 float2\n"
 						"#define vec2 float2\n"
 						"#define vec3 float3\n"
 						"#define vec3 float3\n"
 						"#define vec4 float4\n"
 						"#define vec4 float4\n"