2
0
Эх сурвалжийг харах

Uniform buffer actually work

Marko Pintera 13 жил өмнө
parent
commit
6d8e1b9520

+ 4 - 3
CamelotClient/CamelotClient.cpp

@@ -96,20 +96,21 @@ int _tmain(int argc, _TCHAR* argv[])
 	//vertProg =  HighLevelGpuProgram::create(vertShaderCode, "vs_main", "cg", GPT_VERTEX_PROGRAM, GPP_VS_2_0);
 
 	///////////////// GLSL SHADERS ////////////////////////////
-	String fragShaderCode = " #version 330 \n \
+	String fragShaderCode = " #version 400 \n \
+							  uniform mainFragBlock { vec4 solidcolor; }; \
 							  uniform sampler2D tex; \
 							  in vec2 texcoord0; \
 							  out vec4 fragColor; \
 							  void main() \
 							  {\
 								  vec4 texColor = texture2D(tex, texcoord0.st);\
-								  fragColor = texColor; \
+								  fragColor = solidcolor; \
 							  }";
 
 	fragProg = HighLevelGpuProgram::create(fragShaderCode, "main", "glsl", GPT_FRAGMENT_PROGRAM, GPP_PS_2_0);
 
 	// TODO - Make sure to document the strict input parameter naming. (Exact supported names are in GLSLParamParser)
-	String vertShaderCode = "#version 330 \n \
+	String vertShaderCode = "#version 400 \n \
 							 uniform mat4 matViewProjection; \
 							 in vec4 cm_position; \
 							 in vec2 cm_texcoord0; \

+ 33 - 33
CamelotD3D11RenderSystem/Include/CmD3D11VertexDeclaration.h

@@ -5,57 +5,57 @@
 
 namespace CamelotEngine
 {
-	class D3D11VertexDeclaration : public VertexDeclaration
-	{
-	protected:
-		mutable bool mNeedsRebuild;
-
-		map<D3D11Device*, ID3D11InputLayout*>::type mLayoutPerDevice;
-	public:
-		D3D11VertexDeclaration();
-		~D3D11VertexDeclaration();
-
+	class D3D11VertexDeclaration : public VertexDeclaration
+	{
+	protected:
+		mutable bool mNeedsRebuild;
+
+		map<D3D11Device*, ID3D11InputLayout*>::type mLayoutPerDevice;
+	public:
+		D3D11VertexDeclaration();
+		~D3D11VertexDeclaration();
+
 		/**
 		 * @copydoc VertexDeclaration::addElement
-		 */
-		const VertexElement& addElement(unsigned short source, UINT32 offset, VertexElementType theType,
-			VertexElementSemantic semantic, unsigned short index = 0);
-
+		 */
+		const VertexElement& addElement(unsigned short source, UINT32 offset, VertexElementType theType,
+			VertexElementSemantic semantic, unsigned short index = 0);
+
 		/**
 		 * @copydoc VertexDeclaration::insertElement
-		 */
-		const VertexElement& insertElement(unsigned short atPosition,
-			unsigned short source, UINT32 offset, VertexElementType theType,
-			VertexElementSemantic semantic, unsigned short index = 0);
-
+		 */
+		const VertexElement& insertElement(unsigned short atPosition,
+			unsigned short source, UINT32 offset, VertexElementType theType,
+			VertexElementSemantic semantic, unsigned short index = 0);
+
 		/**
 		 * @copydoc VertexDeclaration::removeElement(unsigned short)
 		 */
-		void removeElement(unsigned short elem_index);
-
+		void removeElement(unsigned short elem_index);
+
 		/**
 		 * @copydoc VertexDeclaration::removeElement(VertexElementSemantic, unsigned short)
-		 */
-		void removeElement(VertexElementSemantic semantic, unsigned short index = 0);
-
+		 */
+		void removeElement(VertexElementSemantic semantic, unsigned short index = 0);
+
 		/**
 		 * @copydoc VertexDeclaration::removeAllElements
-		 */
-		void removeAllElements(void);
-
+		 */
+		void removeAllElements(void);
+
 		/**
 		 * @copydoc VertexDeclaration::modifyElement
-		 */
-		void modifyElement(unsigned short elem_index, unsigned short source, UINT32 offset, VertexElementType theType,
-			VertexElementSemantic semantic, unsigned short index = 0);
-
+		 */
+		void modifyElement(unsigned short elem_index, unsigned short source, UINT32 offset, VertexElementType theType,
+			VertexElementSemantic semantic, unsigned short index = 0);
+
 		/**
 		 * @brief	Gets the D3D11 input layout.
 		 * 			
 		 * @note	Recreates the layout if it way previously modified or if this is the first time
 		 * 			this method is called.
 		 */
-		ID3D11InputLayout* getD3DLayout(D3D11Device& device, D3D11HLSLProgram& programToBindTo);
-	};
+		ID3D11InputLayout* getD3DLayout(D3D11Device& device, D3D11HLSLProgram& programToBindTo);
+	};
 
 }

+ 1 - 0
CamelotForwardRenderer/Source/CmForwardRenderer.cpp

@@ -71,6 +71,7 @@ namespace CamelotEngine
 			// (probably at an earlier stage). e.g. I want the user to be warned if the shader doesn't contain matViewProjection param
 			// (or should we just ignore such missing parameters?)
 			material->setMat4("matViewProjection", viewProjMatrix);
+			material->setColor("solidcolor", Color::Red);
 
 			for(UINT32 i = 0; i < material->getNumPasses(); i++)
 			{

+ 8 - 3
CamelotGLRenderer/Include/CmGLGpuParamBlock.h

@@ -7,6 +7,12 @@ namespace CamelotEngine
 {
 	class CM_RSGL_EXPORT GLGpuParamBlock : public GpuParamBlock
 	{
+	private:
+		struct GLGpuParamBlockSharedData
+		{
+			GLuint mGLHandle;
+		};
+
 	public:
 		GLGpuParamBlock(const GpuParamBlockDesc& desc);
 		~GLGpuParamBlock();
@@ -14,10 +20,9 @@ namespace CamelotEngine
 		virtual void updateIfDirty();
 		virtual GpuParamBlockPtr clone() const;
 
-		GLuint getGLHandle() const { return mGLHandle; }
+		GLuint getGLHandle() const { return mGLSharedData->mGLHandle; }
 
 	private:
-		GLuint mGLHandle;
-		bool mBufferInitialized;
+		GLGpuParamBlockSharedData* mGLSharedData;
 	};
 }

+ 14 - 0
CamelotGLRenderer/Include/CmGLRenderSystem.h

@@ -259,6 +259,13 @@ namespace CamelotEngine {
 		UINT32 mVertexTexOffset;
 		UINT32 mGeometryTexOffset;
 
+		UINT32 mFragmentUBOffset;
+		UINT32 mVertexUBOffset;
+		UINT32 mGeometryUBOffset;
+		UINT32 mHullUBOffset;
+		UINT32 mDomainUBOffset;
+		UINT32 mComputeUBOffset;
+
 		/* The main GL context - main thread only */
         GLContext *mMainContext;
         /* The current GL context  - main thread only */
@@ -520,6 +527,13 @@ namespace CamelotEngine {
 		 */
 		UINT32 getGLTextureUnit(GpuProgramType gptype, UINT32 unit);
 
+		/**
+		 * @brief	OpenGL shares all buffer bindings, but the engine prefers to keep buffers
+		 * 			separate per-stage. This will convert block buffer binding that is set per stage
+		 * 			into a global block buffer binding usable by OpenGL.
+		 */
+		UINT32 getGLUniformBlockBinding(GpuProgramType gptype, UINT32 binding);
+
 		void setActiveProgram(GpuProgramType gptype, GLSLGpuProgram* program);
 		GLSLGpuProgram* getActiveProgram(GpuProgramType gptype) const;
 

+ 23 - 12
CamelotGLRenderer/Source/CmGLGpuParamBlock.cpp

@@ -3,28 +3,37 @@
 namespace CamelotEngine
 {
 	GLGpuParamBlock::GLGpuParamBlock(const GpuParamBlockDesc& desc)
-		:GpuParamBlock(desc), mBufferInitialized(false), mGLHandle(0)
+		:GpuParamBlock(desc), mGLSharedData(nullptr)
 	{
-
+		mGLSharedData = new GLGpuParamBlockSharedData();
 	}
 
 	GLGpuParamBlock::~GLGpuParamBlock()
 	{
-
+		if(mOwnsSharedData)
+		{
+			glDeleteBuffers(1, &mGLSharedData->mGLHandle);
+			delete mGLSharedData;
+		}
 	}
 
 	void GLGpuParamBlock::updateIfDirty()
 	{
-		if(mDirty)
+		if(!sharedData->mInitialized)
+		{
+			glGenBuffers(1, &mGLSharedData->mGLHandle);
+			glBindBuffer(GL_UNIFORM_BUFFER, mGLSharedData->mGLHandle);
+			glBufferData(GL_UNIFORM_BUFFER, mSize, (GLvoid*)mData, GL_DYNAMIC_DRAW);
+			glBindBuffer(GL_UNIFORM_BUFFER, 0);
+
+			sharedData->mInitialized = true;
+		}
+
+		if(sharedData->mDirty)
 		{
-			if(!mBufferInitialized)
-			{
-				glGenBuffers(1, &mGLHandle);
-				glBindBuffer(GL_UNIFORM_BUFFER, mGLHandle);
-				glBufferData(mGLHandle, mSize, mData, GL_WRITE_ONLY);
-			}
-			else
-				glBufferSubData(GL_UNIFORM_BUFFER, 0 , mSize, mData);
+			glBindBuffer(GL_UNIFORM_BUFFER, mGLSharedData->mGLHandle);
+			glBufferSubData(GL_UNIFORM_BUFFER, 0 , mSize, mData);
+			glBindBuffer(GL_UNIFORM_BUFFER, 0);
 		}
 
 		GpuParamBlock::updateIfDirty();
@@ -34,6 +43,8 @@ namespace CamelotEngine
 	{
 		std::shared_ptr<GLGpuParamBlock> clonedParamBlock(new GLGpuParamBlock(*this));
 		clonedParamBlock->mData = new UINT8[mSize];
+		clonedParamBlock->mOwnsSharedData = false;
+		clonedParamBlock->mGLSharedData = mGLSharedData;
 		memcpy(clonedParamBlock->mData, mData, mSize);
 
 		return clonedParamBlock;

+ 177 - 81
CamelotGLRenderer/Source/CmGLRenderSystem.cpp

@@ -49,6 +49,7 @@ THE SOFTWARE.s
 #include "CmGLRenderWindowManager.h"
 #include "CmGLSLProgramPipelineManager.h"
 #include "CmGpuParams.h"
+#include "CmGLGpuParamBlock.h"
 #include "CmDebug.h"
 
 #if CM_DEBUG_MODE
@@ -86,7 +87,13 @@ namespace CamelotEngine
 		mFragmentTexOffset(0),
 		mVertexTexOffset(0),
 		mGeometryTexOffset(0),
-		mTextureTypes(nullptr)
+		mTextureTypes(nullptr),
+		mFragmentUBOffset(0),
+		mVertexUBOffset(0),
+		mGeometryUBOffset(0),
+		mHullUBOffset(0),
+		mDomainUBOffset(0),
+		mComputeUBOffset(0)
 	{
 		// Get our GLSupport
 		mGLSupport = CamelotEngine::getGLSupport();
@@ -311,90 +318,103 @@ namespace CamelotEngine
 			texUnit++;
 		}
 
+		UINT32 blockBinding = 0;
+		for(auto iter = paramDesc.paramBlocks.begin(); iter != paramDesc.paramBlocks.end(); ++iter)
+		{
+			if(iter->second.slot == 0)
+				continue;
+
+			GpuParamBlockPtr paramBlock = params->getParamBlock(iter->second.slot);
+			if(paramBlock == nullptr)
+				continue;
+
+			GLGpuParamBlockPtr glParamBlock = std::static_pointer_cast<GLGpuParamBlock>(paramBlock);
+
+			UINT32 globalBlockBinding = getGLUniformBlockBinding(gptype, blockBinding);
+			glUniformBlockBinding(glProgram, iter->second.slot - 1, globalBlockBinding);
+			glBindBufferRange(GL_UNIFORM_BUFFER, globalBlockBinding, glParamBlock->getGLHandle(), 0, glParamBlock->getSize());
+
+			blockBinding++;
+		}
+
 		for(auto iter = paramDesc.params.begin(); iter != paramDesc.params.end(); ++iter)
 		{
 			const GpuParamMemberDesc& paramDesc = iter->second;
 
 			GpuParamBlockPtr paramBlock = params->getParamBlock(paramDesc.paramBlockSlot);
 			
-			if(paramDesc.paramBlockSlot == 0) // 0 means uniforms are not in a block
-			{
-				const UINT8* ptrData = paramBlock->getDataPtr(paramDesc.cpuMemOffset * sizeof(UINT32));
-
-				switch(paramDesc.type)
-				{
-				case GCT_FLOAT1:
-					glProgramUniform1fv(glProgram, paramDesc.gpuMemOffset, paramDesc.arraySize, (GLfloat*)ptrData);
-					break;
-				case GCT_FLOAT2:
-					glProgramUniform2fv(glProgram, paramDesc.gpuMemOffset, paramDesc.arraySize, (GLfloat*)ptrData);
-					break;
-				case GCT_FLOAT3:
-					glProgramUniform3fv(glProgram, paramDesc.gpuMemOffset, paramDesc.arraySize, (GLfloat*)ptrData);
-					break;
-				case GCT_FLOAT4:
-					glProgramUniform4fv(glProgram, paramDesc.gpuMemOffset, paramDesc.arraySize, (GLfloat*)ptrData);
-					break;
-				case GCT_MATRIX_2X2:
-					glProgramUniformMatrix2fv(glProgram, paramDesc.gpuMemOffset, paramDesc.arraySize, 
-						GL_TRUE, (GLfloat*)ptrData);
-					break;
-				case GCT_MATRIX_2X3:
-					glProgramUniformMatrix2x3fv(glProgram, paramDesc.gpuMemOffset, paramDesc.arraySize, 
-						GL_TRUE, (GLfloat*)ptrData);
-					break;
-				case GCT_MATRIX_2X4:
-					glProgramUniformMatrix2x4fv(glProgram, paramDesc.gpuMemOffset, paramDesc.arraySize, 
-						GL_TRUE, (GLfloat*)ptrData);
-					break;
-				case GCT_MATRIX_3X2:
-					glProgramUniformMatrix3x2fv(glProgram, paramDesc.gpuMemOffset, paramDesc.arraySize, 
-						GL_TRUE, (GLfloat*)ptrData);
-					break;
-				case GCT_MATRIX_3X3:
-					glProgramUniformMatrix3fv(glProgram, paramDesc.gpuMemOffset, paramDesc.arraySize, 
-						GL_TRUE, (GLfloat*)ptrData);
-					break;
-				case GCT_MATRIX_3X4:
-					glProgramUniformMatrix3x4fv(glProgram, paramDesc.gpuMemOffset, paramDesc.arraySize, 
-						GL_TRUE, (GLfloat*)ptrData);
-					break;
-				case GCT_MATRIX_4X2:
-					glProgramUniformMatrix4x2fv(glProgram, paramDesc.gpuMemOffset, paramDesc.arraySize, 
-						GL_TRUE, (GLfloat*)ptrData);
-					break;
-				case GCT_MATRIX_4X3:
-					glProgramUniformMatrix4x3fv(glProgram, paramDesc.gpuMemOffset, paramDesc.arraySize, 
-						GL_TRUE, (GLfloat*)ptrData);
-					break;
-				case GCT_MATRIX_4X4:
-					glProgramUniformMatrix4fv(glProgram, paramDesc.gpuMemOffset, paramDesc.arraySize, 
-						GL_TRUE, (GLfloat*)ptrData);
-					break;
-				case GCT_INT1:
-					glProgramUniform1iv(glProgram, paramDesc.gpuMemOffset, paramDesc.arraySize, (GLint*)ptrData);
-					break;
-				case GCT_INT2:
-					glProgramUniform2iv(glProgram, paramDesc.gpuMemOffset, paramDesc.arraySize, (GLint*)ptrData);
-					break;
-				case GCT_INT3:
-					glProgramUniform3iv(glProgram, paramDesc.gpuMemOffset, paramDesc.arraySize, (GLint*)ptrData);
-					break;
-				case GCT_INT4:
-					glProgramUniform4iv(glProgram, paramDesc.gpuMemOffset, paramDesc.arraySize, (GLint*)ptrData);
-					break;
-				case GMT_BOOL:
-					glProgramUniform1uiv(glProgram, paramDesc.gpuMemOffset, paramDesc.arraySize, (GLuint*)ptrData);
-					break;
-				case GCT_UNKNOWN:
-					break;
-				}
-			}
-			else
-			{
+			if(paramDesc.paramBlockSlot != 0) // 0 means uniforms are not in a block
+				continue;
 
+			const UINT8* ptrData = paramBlock->getDataPtr(paramDesc.cpuMemOffset * sizeof(UINT32));
 
-				CM_EXCEPT(NotImplementedException, "Support for uniform blocks not implemented yet");
+			switch(paramDesc.type)
+			{
+			case GCT_FLOAT1:
+				glProgramUniform1fv(glProgram, paramDesc.gpuMemOffset, paramDesc.arraySize, (GLfloat*)ptrData);
+				break;
+			case GCT_FLOAT2:
+				glProgramUniform2fv(glProgram, paramDesc.gpuMemOffset, paramDesc.arraySize, (GLfloat*)ptrData);
+				break;
+			case GCT_FLOAT3:
+				glProgramUniform3fv(glProgram, paramDesc.gpuMemOffset, paramDesc.arraySize, (GLfloat*)ptrData);
+				break;
+			case GCT_FLOAT4:
+				glProgramUniform4fv(glProgram, paramDesc.gpuMemOffset, paramDesc.arraySize, (GLfloat*)ptrData);
+				break;
+			case GCT_MATRIX_2X2:
+				glProgramUniformMatrix2fv(glProgram, paramDesc.gpuMemOffset, paramDesc.arraySize, 
+					GL_TRUE, (GLfloat*)ptrData);
+				break;
+			case GCT_MATRIX_2X3:
+				glProgramUniformMatrix2x3fv(glProgram, paramDesc.gpuMemOffset, paramDesc.arraySize, 
+					GL_TRUE, (GLfloat*)ptrData);
+				break;
+			case GCT_MATRIX_2X4:
+				glProgramUniformMatrix2x4fv(glProgram, paramDesc.gpuMemOffset, paramDesc.arraySize, 
+					GL_TRUE, (GLfloat*)ptrData);
+				break;
+			case GCT_MATRIX_3X2:
+				glProgramUniformMatrix3x2fv(glProgram, paramDesc.gpuMemOffset, paramDesc.arraySize, 
+					GL_TRUE, (GLfloat*)ptrData);
+				break;
+			case GCT_MATRIX_3X3:
+				glProgramUniformMatrix3fv(glProgram, paramDesc.gpuMemOffset, paramDesc.arraySize, 
+					GL_TRUE, (GLfloat*)ptrData);
+				break;
+			case GCT_MATRIX_3X4:
+				glProgramUniformMatrix3x4fv(glProgram, paramDesc.gpuMemOffset, paramDesc.arraySize, 
+					GL_TRUE, (GLfloat*)ptrData);
+				break;
+			case GCT_MATRIX_4X2:
+				glProgramUniformMatrix4x2fv(glProgram, paramDesc.gpuMemOffset, paramDesc.arraySize, 
+					GL_TRUE, (GLfloat*)ptrData);
+				break;
+			case GCT_MATRIX_4X3:
+				glProgramUniformMatrix4x3fv(glProgram, paramDesc.gpuMemOffset, paramDesc.arraySize, 
+					GL_TRUE, (GLfloat*)ptrData);
+				break;
+			case GCT_MATRIX_4X4:
+				glProgramUniformMatrix4fv(glProgram, paramDesc.gpuMemOffset, paramDesc.arraySize, 
+					GL_TRUE, (GLfloat*)ptrData);
+				break;
+			case GCT_INT1:
+				glProgramUniform1iv(glProgram, paramDesc.gpuMemOffset, paramDesc.arraySize, (GLint*)ptrData);
+				break;
+			case GCT_INT2:
+				glProgramUniform2iv(glProgram, paramDesc.gpuMemOffset, paramDesc.arraySize, (GLint*)ptrData);
+				break;
+			case GCT_INT3:
+				glProgramUniform3iv(glProgram, paramDesc.gpuMemOffset, paramDesc.arraySize, (GLint*)ptrData);
+				break;
+			case GCT_INT4:
+				glProgramUniform4iv(glProgram, paramDesc.gpuMemOffset, paramDesc.arraySize, (GLint*)ptrData);
+				break;
+			case GMT_BOOL:
+				glProgramUniform1uiv(glProgram, paramDesc.gpuMemOffset, paramDesc.arraySize, (GLuint*)ptrData);
+				break;
+			case GCT_UNKNOWN:
+				break;
 			}
 		}
 	}
@@ -1788,13 +1808,38 @@ namespace CamelotEngine
 
 		UINT16 numCombinedTexUnits = caps->getNumCombinedTextureUnits();
 
-		if((mFragmentTexOffset + mVertexTexOffset + mGeometryTexOffset) > numCombinedTexUnits)
+		UINT32 totalNumTexUnits = caps->getNumTextureUnits(GPT_VERTEX_PROGRAM);
+		totalNumTexUnits += caps->getNumTextureUnits(GPT_FRAGMENT_PROGRAM);
+		totalNumTexUnits += caps->getNumTextureUnits(GPT_GEOMETRY_PROGRAM);
+		totalNumTexUnits += caps->getNumTextureUnits(GPT_HULL_PROGRAM);
+		totalNumTexUnits += caps->getNumTextureUnits(GPT_DOMAIN_PROGRAM);
+		totalNumTexUnits += caps->getNumTextureUnits(GPT_COMPUTE_PROGRAM);
+
+		if(totalNumTexUnits > numCombinedTexUnits)
 			CM_EXCEPT(InternalErrorException, "Number of combined texture units less than the number of individual units!?");
 
 		mTextureTypes = new GLenum[numCombinedTexUnits];
 		for(UINT16 i = 0; i < numCombinedTexUnits; i++)
 			mTextureTypes[i] = 0;
 
+		mVertexUBOffset = 0;
+		UINT32 totalNumUniformBlocks = caps->getNumUniformBlockBuffers(GPT_VERTEX_PROGRAM);
+		mFragmentUBOffset = totalNumUniformBlocks;
+		totalNumUniformBlocks += caps->getNumUniformBlockBuffers(GPT_FRAGMENT_PROGRAM);
+		mGeometryUBOffset = totalNumUniformBlocks;
+		totalNumUniformBlocks += caps->getNumUniformBlockBuffers(GPT_GEOMETRY_PROGRAM);
+		mHullUBOffset = totalNumUniformBlocks;
+		totalNumUniformBlocks += caps->getNumUniformBlockBuffers(GPT_HULL_PROGRAM);
+		mDomainUBOffset = totalNumUniformBlocks;
+		totalNumUniformBlocks += caps->getNumUniformBlockBuffers(GPT_DOMAIN_PROGRAM);
+		mComputeUBOffset = totalNumUniformBlocks;
+		totalNumUniformBlocks += caps->getNumUniformBlockBuffers(GPT_COMPUTE_PROGRAM);
+
+		UINT16 numCombinedUniformBlocks = caps->getNumCombinedUniformBlockBuffers();
+
+		if(totalNumUniformBlocks > numCombinedUniformBlocks)
+			CM_EXCEPT(InternalErrorException, "Number of combined uniform block buffers less than the number of individual per-stage buffers!?");
+
 		/// Create the texture manager        
 		TextureManager::startUp(new GLTextureManager(*mGLSupport)); 
 
@@ -2101,31 +2146,54 @@ namespace CamelotEngine
 			rsc->setCapability(RSC_VERTEX_TEXTURE_FETCH);
 		}
 
-		if (mGLSupport->checkExtension("ARB_geometry_shader4"))
+		GLint numUniformBlocks;
+		glGetIntegerv(GL_MAX_VERTEX_UNIFORM_BLOCKS, &numUniformBlocks);
+		rsc->setNumUniformBlockBuffers(GPT_VERTEX_PROGRAM, numUniformBlocks);
+
+		glGetIntegerv(GL_MAX_FRAGMENT_UNIFORM_BLOCKS, &numUniformBlocks);
+		rsc->setNumUniformBlockBuffers(GPT_FRAGMENT_PROGRAM, numUniformBlocks);
+
+		if (mGLSupport->checkExtension("GL_ARB_geometry_shader4"))
 		{
 			GLint geomUnits;
 			glGetIntegerv(GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, &geomUnits);
 			rsc->setNumTextureUnits(GPT_GEOMETRY_PROGRAM, static_cast<UINT16>(geomUnits));
+
+			glGetIntegerv(GL_MAX_GEOMETRY_UNIFORM_BLOCKS, &numUniformBlocks);
+			rsc->setNumUniformBlockBuffers(GPT_GEOMETRY_PROGRAM, numUniformBlocks);
 		}
 
-		if (mGLSupport->checkExtension("ARB_tessellation_shader"))
+		if (mGLSupport->checkExtension("GL_ARB_tessellation_shader"))
 		{
 			rsc->setCapability(RSC_TESSELLATION_PROGRAM);
+
+			glGetIntegerv(GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS, &numUniformBlocks);
+			rsc->setNumUniformBlockBuffers(GPT_HULL_PROGRAM, numUniformBlocks);
+
+			glGetIntegerv(GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, &numUniformBlocks);
+			rsc->setNumUniformBlockBuffers(GPT_DOMAIN_PROGRAM, numUniformBlocks);
 		}
 
-		if (mGLSupport->checkExtension("ARB_compute_shader")) // Enable once I include GL 4.3
+		if (mGLSupport->checkExtension("GL_ARB_compute_shader")) // Enable once I include GL 4.3
 		{
 			//rsc->setCapability(RSC_COMPUTE_PROGRAM);
 
 			//GLint computeUnits;
 			//glGetIntegerv(GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS, &computeUnits);
 			//rsc->setNumTextureUnits(GPT_COMPUTE_PROGRAM, static_cast<UINT16>(computeUnits));
+			
+			//glGetIntegerv(GL_MAX_COMPUTE_UNIFORM_BLOCKS, &numUniformBlocks);
+			//rsc->setNumUniformBlockBuffers(GPT_COMPUTE_PROGRAM, numUniformBlocks);
 		}
 
 		GLint combinedTexUnits;
 		glGetIntegerv(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS, &combinedTexUnits);
 		rsc->setNumCombinedTextureUnits(static_cast<UINT16>(combinedTexUnits));
 
+		GLint combinedUniformBlockUnits;
+		glGetIntegerv(GL_MAX_COMBINED_UNIFORM_BLOCKS, &combinedUniformBlockUnits);
+		rsc->setNumCombinedUniformBlockBuffers(static_cast<UINT16>(combinedUniformBlockUnits));
+
 		// Mipmap LOD biasing
 		rsc->setCapability(RSC_MIPMAP_LOD_BIAS);
 
@@ -2172,6 +2240,34 @@ namespace CamelotEngine
 			CM_EXCEPT(InternalErrorException, "Invalid program type: " + toString(gptype));
 		}
 	}
+	//---------------------------------------------------------------------
+	UINT32 GLRenderSystem::getGLUniformBlockBinding(GpuProgramType gptype, UINT32 binding)
+	{
+		UINT32 maxNumBindings = mCurrentCapabilities->getNumUniformBlockBuffers(gptype);
+		if(binding < 0 || binding >= maxNumBindings)
+		{
+			CM_EXCEPT(InvalidParametersException, "Invalid buffer binding for the provided stage. Buffer binding: " + toString(binding) + ". Stage: " + 
+				toString(gptype) + ". Supported range is 0 .. " + toString(maxNumBindings - 1));
+		}
+
+		switch(gptype)
+		{
+		case GPT_FRAGMENT_PROGRAM:
+			return mFragmentUBOffset + binding;
+		case GPT_VERTEX_PROGRAM:
+			return mVertexUBOffset + binding;
+		case GPT_GEOMETRY_PROGRAM:
+			return mGeometryUBOffset + binding;
+		case GPT_HULL_PROGRAM:
+			return mHullUBOffset + binding;
+		case GPT_DOMAIN_PROGRAM:
+			return mDomainUBOffset + binding;
+		case GPT_COMPUTE_PROGRAM:
+			return mComputeUBOffset + binding;
+		default:
+			CM_EXCEPT(InternalErrorException, "Invalid program type: " + toString(gptype));
+		}
+	}
 
 	void GLRenderSystem::setActiveProgram(GpuProgramType gptype, GLSLGpuProgram* program)
 	{

+ 4 - 4
CamelotGLRenderer/Source/CmWin32GLSupport.cpp

@@ -117,11 +117,11 @@ namespace CamelotEngine
 #endif
 
 		// Check for W32 specific extensions probe function
-		PFNWGLGETEXTENSIONSSTRINGARBPROC _wglGetExtensionsStringARB = 
-			(PFNWGLGETEXTENSIONSSTRINGARBPROC)wglGetProcAddress("wglGetExtensionsStringARB");
-		if(!_wglGetExtensionsStringARB)
+		PFNWGLGETEXTENSIONSSTRINGARBPROC _wglGetExtensionsString = 
+			(PFNWGLGETEXTENSIONSSTRINGARBPROC)wglGetProcAddress("wglGetExtensionsString");
+		if(!_wglGetExtensionsString)
 			return;
-		const char *wgl_extensions = _wglGetExtensionsStringARB(mInitialWindow->getHDC());
+		const char *wgl_extensions = _wglGetExtensionsString(mInitialWindow->getHDC());
 
 		// Parse them, and add them to the main list
 		StringStream ext;

+ 10 - 1
CamelotGLRenderer/Source/GLSL/include/CmGLSLParamParser.h

@@ -265,7 +265,13 @@ namespace CamelotEngine
 
 					GLint arrayStride;
 					glGetActiveUniformsiv(glProgram, 1, &index, GL_UNIFORM_ARRAY_STRIDE, &arrayStride);
-					gpuParam.elementSize = arrayStride;
+
+					if(arrayStride != 0)
+					{
+						assert (arrayStride % 4 == 0);
+
+						gpuParam.elementSize = arrayStride / 4;
+					}
 
 					gpuParam.paramBlockSlot = blockIndex + 1; // 0 is reserved for globals
 
@@ -298,6 +304,9 @@ namespace CamelotEngine
 			GLint blockSize = 0;
 			glGetActiveUniformBlockiv(glProgram, iter->second.slot - 1, GL_UNIFORM_BLOCK_DATA_SIZE, &blockSize);
 
+			assert (blockSize % 4 == 0);
+			blockSize = blockSize / 4;
+
 			if(iter->second.blockSize != blockSize)
 				CM_EXCEPT(InternalErrorException, "OpenGL specified and manual uniform block buffer sizes don't match!");
 		}

+ 10 - 1
CamelotRenderer/Include/CmGpuParamBlock.h

@@ -6,6 +6,13 @@ namespace CamelotEngine
 {
 	class CM_EXPORT GpuParamBlock
 	{
+	private:
+		struct GpuParamBlockSharedData
+		{
+			bool mDirty;
+			bool mInitialized;
+		};
+
 	public:
 		GpuParamBlock(const GpuParamBlockDesc& desc);
 		virtual ~GpuParamBlock();
@@ -14,6 +21,7 @@ namespace CamelotEngine
 		void zeroOut(UINT32 offset, UINT32 size);
 
 		const UINT8* getDataPtr(UINT32 offset) const;
+		UINT32 getSize() const { return mSize; }
 
 		virtual void updateIfDirty();
 
@@ -21,7 +29,8 @@ namespace CamelotEngine
 		
 		static GpuParamBlockPtr create(const GpuParamBlockDesc& desc);
 	protected:
-		bool mDirty;
+		GpuParamBlockSharedData* sharedData;
+		bool mOwnsSharedData;
 		UINT8* mData;
 		UINT32 mSize;
 	};

+ 34 - 0
CamelotRenderer/Include/CmRenderSystemCapabilities.h

@@ -256,6 +256,10 @@ namespace CamelotEngine
 		map<GpuProgramType, UINT16>::type mNumTextureUnitsPerStage;
 		/// Total number of texture units available
 		UINT16 mNumCombinedTextureUnits;
+		/// The number of uniform blocks available per stage
+		map<GpuProgramType, UINT16>::type mNumUniformBlocksPerStage;
+		/// Total number of uniform blocks available
+		UINT16 mNumCombinedUniformBlocks;
 		/// The stencil buffer bit depth
 		UINT16 mStencilBufferBitDepth;
 		/// The number of matrices available for hardware blending
@@ -381,6 +385,16 @@ namespace CamelotEngine
 			mNumCombinedTextureUnits = num;
 		}
 
+		void setNumUniformBlockBuffers(GpuProgramType type, UINT16 num)
+		{
+			mNumUniformBlocksPerStage[type] = num;
+		}
+
+		void setNumCombinedUniformBlockBuffers(UINT16 num)
+		{
+			mNumCombinedUniformBlocks = num;
+		}
+
 		void setStencilBufferBitDepth(UINT16 num)
 		{
 			mStencilBufferBitDepth = num;
@@ -422,6 +436,26 @@ namespace CamelotEngine
 			return mNumCombinedTextureUnits;
 		}
 
+		/** Returns the number of uniform buffer blocks the current output hardware
+		supports, for the specified stage.
+		*/
+		UINT16 getNumUniformBlockBuffers(GpuProgramType type) const
+		{
+			auto iterFind = mNumUniformBlocksPerStage.find(type);
+			if(iterFind != mNumUniformBlocksPerStage.end())
+				return iterFind->second;
+			else
+				return 0;
+		}
+
+		/** Returns the number of combined uniform buffers the current output hardware
+		supports, total for all stages combined.
+		*/
+		UINT16 getNumCombinedUniformBlockBuffers() const
+		{
+			return mNumCombinedUniformBlocks;
+		}
+
 		/** Determines the bit depth of the hardware accelerated stencil 
 		buffer, if supported.
 		@remarks

+ 13 - 4
CamelotRenderer/Source/CmGpuParamBlock.cpp

@@ -6,15 +6,22 @@
 namespace CamelotEngine
 {
 	GpuParamBlock::GpuParamBlock(const GpuParamBlockDesc& desc)
-		:mSize(desc.blockSize * sizeof(UINT32)), mDirty(true)
+		:mSize(desc.blockSize * sizeof(UINT32)), mOwnsSharedData(true)
 	{
 		mData = new UINT8[desc.blockSize];
 		memset(mData, 0, desc.blockSize);
+
+		sharedData = new GpuParamBlockSharedData();
+		sharedData->mDirty = true;
+		sharedData->mInitialized = false;
 	}
 
 	GpuParamBlock::~GpuParamBlock()
 	{
 		delete [] mData;
+
+		if(mOwnsSharedData)
+			delete sharedData;
 	}
 
 	void GpuParamBlock::write(UINT32 offset, const void* data, UINT32 size)
@@ -30,7 +37,7 @@ namespace CamelotEngine
 
 		memcpy(mData + offset, data, size);
 
-		mDirty = true;
+		sharedData->mDirty = true;
 	}
 
 	void GpuParamBlock::zeroOut(UINT32 offset, UINT32 size)
@@ -46,7 +53,7 @@ namespace CamelotEngine
 
 		memset(mData + offset, 0, size);
 
-		mDirty = true;
+		sharedData->mDirty = true;
 	}
 
 	const UINT8* GpuParamBlock::getDataPtr(UINT32 offset) const
@@ -65,7 +72,7 @@ namespace CamelotEngine
 
 	void GpuParamBlock::updateIfDirty()
 	{
-		mDirty = false;
+		sharedData->mDirty = false;
 
 		// Do nothing
 	}
@@ -74,6 +81,8 @@ namespace CamelotEngine
 	{
 		GpuParamBlockPtr clonedParamBlock(new GpuParamBlock(*this));
 		clonedParamBlock->mData = new UINT8[mSize];
+		clonedParamBlock->mSize = mSize;
+		clonedParamBlock->mOwnsSharedData = false;
 		memcpy(clonedParamBlock->mData, mData, mSize);
 
 		return clonedParamBlock;

+ 8 - 0
CamelotRenderer/Source/CmRenderSystemCapabilities.cpp

@@ -36,6 +36,7 @@ namespace CamelotEngine {
 		: mVendor(GPU_UNKNOWN)
 		, mNumWorldMatrices(0)
 		, mNumCombinedTextureUnits(0)
+		, mNumCombinedUniformBlocks(0)
 		, mStencilBufferBitDepth(0)
 		, mNumVertexBlendMatrices(0)
 		, mNumMultiRenderTargets(1)
@@ -58,6 +59,13 @@ namespace CamelotEngine {
 		mNumTextureUnitsPerStage[GPT_HULL_PROGRAM] = 0;
 		mNumTextureUnitsPerStage[GPT_DOMAIN_PROGRAM] = 0;
 		mNumTextureUnitsPerStage[GPT_COMPUTE_PROGRAM] = 0;
+
+		mNumUniformBlocksPerStage[GPT_VERTEX_PROGRAM] = 0;
+		mNumUniformBlocksPerStage[GPT_FRAGMENT_PROGRAM] = 0;
+		mNumUniformBlocksPerStage[GPT_GEOMETRY_PROGRAM] = 0;
+		mNumUniformBlocksPerStage[GPT_HULL_PROGRAM] = 0;
+		mNumUniformBlocksPerStage[GPT_DOMAIN_PROGRAM] = 0;
+		mNumUniformBlocksPerStage[GPT_COMPUTE_PROGRAM] = 0;
 	}
 	//-----------------------------------------------------------------------
 	RenderSystemCapabilities::~RenderSystemCapabilities()

+ 2 - 11
CamelotRenderer/TODO.txt

@@ -19,18 +19,9 @@
 /////
 -----------GpuProgramParameters/Pass/Material REFACTOR------------------------------
 
-Finish up GL port:
- - Params aren't being initialized anywhere
-
-Set up pipeline
-Assign vertex attributes
- - When rendering, compare mesh vertec decl and shader decl and use only overlapping attribs
-Assign uniform buffers
-
-
-
-Get through capabilities and remove bool/int/float constant counters and instead add uniform, uniform block, input/output component counters
+Destroying and constructing GpuParamBlock happens on main thread, which is wrong (especially destroying it)
 
+Uniform buffers for GL are all set up but don't actually work :/
 Saving/loading of material params isn't completed (in MAterialRTTI)
 Port DX11 to new shader param system
 Ability to switch out GpuParamBlocks (i.e. share them between programs)