Prechádzať zdrojové kódy

OpenGL updated so it uses the new combined GPU parameter design

BearishSun 9 rokov pred
rodič
commit
8ab4660f21

+ 2 - 59
Source/BansheeGLRenderAPI/Include/BsGLRenderAPI.h

@@ -35,20 +35,8 @@ namespace BansheeEngine
 		void setComputePipeline(const SPtr<GpuProgramCore>& computeProgram,
 			const SPtr<CommandBuffer>& commandBuffer = nullptr) override;
 
-		/** @copydoc RenderAPICore::setSamplerState() */
-		void setSamplerState(GpuProgramType gptype, UINT16 texUnit, const SPtr<SamplerStateCore>& samplerState, 
-			const SPtr<CommandBuffer>& commandBuffer = nullptr) override;
-
-		/** @copydoc RenderAPICore::setTexture() */
-		void setTexture(GpuProgramType gptype, UINT16 texUnit, const SPtr<TextureCore>& texture,
-			const SPtr<CommandBuffer>& commandBuffer = nullptr) override;
-
-		/** @copydoc RenderAPICore::setLoadStoreTexture */
-		void setLoadStoreTexture(GpuProgramType gptype, UINT16 texUnit, const SPtr<TextureCore>& texture,
-			const TextureSurface& surface, const SPtr<CommandBuffer>& commandBuffer = nullptr) override;
-
-		/** @copydoc RenderAPICore::setBuffer */
-		void setBuffer(GpuProgramType gptype, UINT16 unit, const SPtr<GpuBufferCore>& buffer, bool loadStore = false,
+		/** @copydoc RenderAPICore::setGpuParams() */
+		void setGpuParams(const SPtr<GpuParamsCore>& gpuParams,
 			const SPtr<CommandBuffer>& commandBuffer = nullptr) override;
 
 		/** @copydoc RenderAPICore::beginFrame() */
@@ -75,10 +63,6 @@ namespace BansheeEngine
 		void setIndexBuffer(const SPtr<IndexBufferCore>& buffer,
 			const SPtr<CommandBuffer>& commandBuffer = nullptr) override;
 
-		/** @copydoc RenderAPICore::setParamBuffer */
-		void setParamBuffer(GpuProgramType gptype, UINT32 slot, const SPtr<GpuParamBlockBufferCore>& buffer,
-			const SPtr<GpuParamDesc>& paramDesc, const SPtr<CommandBuffer>& commandBuffer = nullptr) override;
-
 		/** @copydoc RenderAPICore::setVertexDeclaration() */
 		void setVertexDeclaration(const SPtr<VertexDeclarationCore>& vertexDeclaration,
 			const SPtr<CommandBuffer>& commandBuffer = nullptr) override;
@@ -179,32 +163,6 @@ namespace BansheeEngine
 		/** Gets a combined min/mip filter value usable by OpenGL from the currently set min and mip filters. */
 		GLuint getCombinedMinMipFilter() const;
 
-		/**
-		 * Calculates a global texture unit slot for a sampler specific to a GPU program. 
-		 * 
-		 * @param[in]	gptype		Type of the GPU program the sampler is a part of.
-		 * @param[in]	samplerIdx	Index of the sampler uniform.
-		 * @return					Unique global texture unit index that can be used for binding a texture to the specified
-		 *							sampler.
-		 */
-		UINT32 getGLTextureUnit(GpuProgramType gptype, UINT32 samplerIdx);
-
-		/**
-		 * Calculates a global image unit slot based on a uniform index of the image in a GPU program. 
-		 * 
-		 * @param[in]	gptype		Type of the GPU program the uniform is a part of.
-		 * @param[in]	uniformIdx	Index of the image uniform.
-		 * @return					Unique global image unit index that can be used for binding a load-store texture to the
-		 *							specified uniform.
-		 */
-		UINT32 getGLImageUnit(GpuProgramType gptype, UINT32 uniformIdx);
-
-		/**
-		 * OpenGL shares all buffer bindings, but the engine prefers to keep buffers separate per-stage. This will convert
-		 * block buffer binding that is set per stage into a global block buffer binding usable by OpenGL.
-		 */
-		UINT32 getGLUniformBlockBinding(GpuProgramType gptype, UINT32 binding);
-
 		/** Returns the OpenGL specific mode used for drawing, depending on the currently set draw operation. */
 		GLint getGLDrawMode() const;
 
@@ -410,16 +368,9 @@ namespace BansheeEngine
 		/** Information about a currently bound texture. */
 		struct TextureInfo
 		{
-			UINT32 samplerIdx;
 			GLenum type;
 		};
 
-		/** Information about a currently bound load-store texture (image in OpenGL lingo). */
-		struct ImageInfo
-		{
-			UINT32 uniformIdx;
-		};
-
 		static const UINT32 MAX_VB_COUNT = 32;
 
 		Rect2 mViewportNorm;
@@ -439,9 +390,6 @@ namespace BansheeEngine
 		// Holds texture type settings for every stage
 		UINT32 mNumTextureUnits;
 		TextureInfo* mTextureInfos;
-		UINT32 mNumImageUnits;
-		ImageInfo* mImageInfos;
-
 		bool mDepthWrite;
 		bool mColorWrite[4];
 
@@ -460,11 +408,6 @@ namespace BansheeEngine
 
 		const GLSLProgramPipeline* mActivePipeline;
 
-		UINT32 mTextureUnitOffsets[6];
-		UINT32 mMaxBoundTexUnits[6];
-		UINT32 mMaxBoundImageUnits[6];
-		UINT32 mUBOffsets[6];
-
 		std::array<SPtr<VertexBufferCore>, MAX_VB_COUNT> mBoundVertexBuffers;
 		SPtr<VertexDeclarationCore> mBoundVertexDeclaration;
 		SPtr<IndexBufferCore> mBoundIndexBuffer;

+ 305 - 453
Source/BansheeGLRenderAPI/Source/BsGLRenderAPI.cpp

@@ -49,16 +49,10 @@ namespace BansheeEngine
 		, mStencilCompareBack(CMPF_ALWAYS_PASS)
 		, mNumTextureUnits(0)
 		, mTextureInfos(nullptr)
-		, mNumImageUnits(0)
-		, mImageInfos(nullptr)
 		, mDepthWrite(true)
 		, mGLSLProgramFactory(nullptr)
 		, mProgramPipelineManager(nullptr)
 		, mActivePipeline(nullptr)
-		, mTextureUnitOffsets {}
-		, mMaxBoundTexUnits {}
-		, mMaxBoundImageUnits {}
-		, mUBOffsets {}
 		, mCurrentDrawOperation(DOT_TRIANGLE_LIST)
 		, mDrawCallInProgress(false)
 		, mActiveTextureUnit(0)
@@ -206,11 +200,8 @@ namespace BansheeEngine
 		if(mGLSupport)
 			bs_delete(mGLSupport);
 
-		if(mTextureInfos != nullptr)
+		if (mTextureInfos != nullptr)
 			bs_deleteN(mTextureInfos, mNumTextureUnits);
-
-		if (mImageInfos != nullptr)
-			bs_deleteN(mImageInfos, mNumImageUnits);
 	}
 
 	void GLRenderAPI::setGraphicsPipeline(const SPtr<GpuPipelineStateCore>& pipelineState,
@@ -257,16 +248,6 @@ namespace BansheeEngine
 				depthStencilState = DepthStencilStateCore::getDefault().get();
 			}
 
-			int types[] = { GPT_VERTEX_PROGRAM, GPT_FRAGMENT_PROGRAM, GPT_GEOMETRY_PROGRAM, 
-				GPT_HULL_PROGRAM, GPT_DOMAIN_PROGRAM };
-			
-			UINT32 numTypes = sizeof(types) / sizeof(types[0]);
-			for (UINT32 i = 0; i < numTypes; i++)
-			{
-				mMaxBoundTexUnits[types[i]] = mTextureUnitOffsets[types[i]];
-				mMaxBoundImageUnits[types[i]] = 0;
-			}
-
 			// Blend state
 			{
 				const BlendProperties& stateProps = blendState->getProperties();
@@ -350,9 +331,6 @@ namespace BansheeEngine
 				mCurrentComputeProgram = std::static_pointer_cast<GLSLGpuProgramCore>(computeProgram);
 			else
 				mCurrentComputeProgram = nullptr;
-
-			mMaxBoundTexUnits[GPT_COMPUTE_PROGRAM] = mTextureUnitOffsets[GPT_COMPUTE_PROGRAM];
-			mMaxBoundImageUnits[GPT_COMPUTE_PROGRAM] = 0;
 		};
 
 		if (commandBuffer == nullptr)
@@ -368,368 +346,360 @@ namespace BansheeEngine
 		BS_INC_RENDER_STAT(NumPipelineStateChanges);
 	}
 
-	void GLRenderAPI::setParamBuffer(GpuProgramType gptype, UINT32 slot, const SPtr<GpuParamBlockBufferCore>& buffer, 
-		const SPtr<GpuParamDesc>& paramDesc, const SPtr<CommandBuffer>& commandBuffer)
+	void GLRenderAPI::setGpuParams(const SPtr<GpuParamsCore>& gpuParams, const SPtr<CommandBuffer>& commandBuffer)
 	{
-		auto executeRef = [&](GpuProgramType gptype, UINT32 slot, const SPtr<GpuParamBlockBufferCore>& buffer,
-			const SPtr<GpuParamDesc>& paramDesc)
+		auto executeRef = [&](const SPtr<GpuParamsCore>& gpuParams)
 		{
 			THROW_IF_NOT_CORE_THREAD;
 
-			if (buffer == nullptr)
-				return;
-
-			SPtr<GLSLGpuProgramCore> activeProgram = getActiveProgram(gptype);
-			GLuint glProgram = activeProgram->getGLHandle();
+			for(UINT32 i = 0; i < 8; i++)
+				glBindImageTexture(i, 0, 0, false, 0, GL_READ_WRITE, GL_R32F);
 
-			// 0 means uniforms are not in block, in which case we handle it specially
-			if (slot == 0)
+			bs_frame_mark();
 			{
-				UINT8* uniformBufferData = (UINT8*)bs_stack_alloc(buffer->getSize());
-				buffer->read(0, uniformBufferData, buffer->getSize());
-
-				for (auto iter = paramDesc->params.begin(); iter != paramDesc->params.end(); ++iter)
+				UINT32 textureUnitCount = 0;
+				FrameVector<UINT32> textureUnits(12);
+				auto getTexUnit = [&](UINT32 binding)
 				{
-					const GpuParamDataDesc& param = iter->second;
-
-					if (param.paramBlockSlot != 0) // 0 means uniforms are not in a block
-						continue;
-
-					const UINT8* ptrData = uniformBufferData + param.cpuMemOffset * sizeof(UINT32);
+					UINT32 unit;
 
-					// Note: We don't transpose matrices here even though we don't use column major format
-					// because they are assumed to be pre-transposed in the GpuParams buffer
-					switch (param.type)
+					auto iterFind = std::find(textureUnits.begin(), textureUnits.end(), binding);
+					if (iterFind != textureUnits.end())
+						unit = *iterFind;
+					else
 					{
-					case GPDT_FLOAT1:
-						glProgramUniform1fv(glProgram, param.gpuMemOffset, param.arraySize, (GLfloat*)ptrData);
-						break;
-					case GPDT_FLOAT2:
-						glProgramUniform2fv(glProgram, param.gpuMemOffset, param.arraySize, (GLfloat*)ptrData);
-						break;
-					case GPDT_FLOAT3:
-						glProgramUniform3fv(glProgram, param.gpuMemOffset, param.arraySize, (GLfloat*)ptrData);
-						break;
-					case GPDT_FLOAT4:
-						glProgramUniform4fv(glProgram, param.gpuMemOffset, param.arraySize, (GLfloat*)ptrData);
-						break;
-					case GPDT_MATRIX_2X2:
-						glProgramUniformMatrix2fv(glProgram, param.gpuMemOffset, param.arraySize,
-							GL_FALSE, (GLfloat*)ptrData);
-						break;
-					case GPDT_MATRIX_2X3:
-						glProgramUniformMatrix3x2fv(glProgram, param.gpuMemOffset, param.arraySize,
-							GL_FALSE, (GLfloat*)ptrData);
-						break;
-					case GPDT_MATRIX_2X4:
-						glProgramUniformMatrix4x2fv(glProgram, param.gpuMemOffset, param.arraySize,
-							GL_FALSE, (GLfloat*)ptrData);
-						break;
-					case GPDT_MATRIX_3X2:
-						glProgramUniformMatrix2x3fv(glProgram, param.gpuMemOffset, param.arraySize,
-							GL_FALSE, (GLfloat*)ptrData);
-						break;
-					case GPDT_MATRIX_3X3:
-						glProgramUniformMatrix3fv(glProgram, param.gpuMemOffset, param.arraySize,
-							GL_FALSE, (GLfloat*)ptrData);
-						break;
-					case GPDT_MATRIX_3X4:
-						glProgramUniformMatrix4x3fv(glProgram, param.gpuMemOffset, param.arraySize,
-							GL_FALSE, (GLfloat*)ptrData);
-						break;
-					case GPDT_MATRIX_4X2:
-						glProgramUniformMatrix2x4fv(glProgram, param.gpuMemOffset, param.arraySize,
-							GL_FALSE, (GLfloat*)ptrData);
-						break;
-					case GPDT_MATRIX_4X3:
-						glProgramUniformMatrix3x4fv(glProgram, param.gpuMemOffset, param.arraySize,
-							GL_FALSE, (GLfloat*)ptrData);
-						break;
-					case GPDT_MATRIX_4X4:
-						glProgramUniformMatrix4fv(glProgram, param.gpuMemOffset, param.arraySize,
-							GL_FALSE, (GLfloat*)ptrData);
-						break;
-					case GPDT_INT1:
-						glProgramUniform1iv(glProgram, param.gpuMemOffset, param.arraySize, (GLint*)ptrData);
-						break;
-					case GPDT_INT2:
-						glProgramUniform2iv(glProgram, param.gpuMemOffset, param.arraySize, (GLint*)ptrData);
-						break;
-					case GPDT_INT3:
-						glProgramUniform3iv(glProgram, param.gpuMemOffset, param.arraySize, (GLint*)ptrData);
-						break;
-					case GPDT_INT4:
-						glProgramUniform4iv(glProgram, param.gpuMemOffset, param.arraySize, (GLint*)ptrData);
-						break;
-					case GPDT_BOOL:
-						glProgramUniform1uiv(glProgram, param.gpuMemOffset, param.arraySize, (GLuint*)ptrData);
-						break;
-					default:
-					case GPDT_UNKNOWN:
-						break;
+						unit = textureUnitCount++;
+						textureUnits.push_back(binding);
 					}
-				}
 
-				if (uniformBufferData != nullptr)
-				{
-					bs_stack_free(uniformBufferData);
-				}
-			}
-			else
-			{
-				const GLGpuParamBlockBufferCore* glParamBlockBuffer = static_cast<const GLGpuParamBlockBufferCore*>(buffer.get());
-
-				UINT32 globalBlockBinding = getGLUniformBlockBinding(gptype, slot - 1);
-				glUniformBlockBinding(glProgram, slot - 1, globalBlockBinding);
-				glBindBufferRange(GL_UNIFORM_BUFFER, globalBlockBinding, glParamBlockBuffer->getGLHandle(), 0,
-					glParamBlockBuffer->getSize());
-			}
-		};
-
-		if (commandBuffer == nullptr)
-			executeRef(gptype, slot, buffer, paramDesc);
-		else
-		{
-			auto execute = [=]() { executeRef(gptype, slot, buffer, paramDesc); };
+					return unit;
+				};
 
-			SPtr<GLCommandBuffer> cb = std::static_pointer_cast<GLCommandBuffer>(commandBuffer);
-			cb->queueCommand(execute);
-		}
-
-		BS_INC_RENDER_STAT(NumGpuParamBufferBinds);
-	}
-
-	void GLRenderAPI::setTexture(GpuProgramType gptype, UINT16 unit, const SPtr<TextureCore>& texPtr,
-		const SPtr<CommandBuffer>& commandBuffer)
-	{
-		auto executeRef = [&](GpuProgramType gptype, UINT16 unit, const SPtr<TextureCore>& texPtr)
-		{
-			THROW_IF_NOT_CORE_THREAD;
-
-			UINT32 texUnit = getGLTextureUnit(gptype, unit);
-			if (!activateGLTextureUnit(texUnit))
-				return;
-
-			SPtr<GLTextureCore> tex = std::static_pointer_cast<GLTextureCore>(texPtr);
-			if (tex != nullptr)
-			{
-				GLenum newTextureType = tex->getGLTextureTarget();
-
-				if (mTextureInfos[texUnit].type != newTextureType)
-					glBindTexture(mTextureInfos[texUnit].type, 0);
-
-				mTextureInfos[texUnit].type = newTextureType;
-				mTextureInfos[texUnit].samplerIdx = unit;
-
-				mMaxBoundTexUnits[gptype] = std::max(mMaxBoundTexUnits[gptype], texUnit + 1);
-
-				glBindTexture(newTextureType, tex->getGLID());
-
-				SPtr<GLSLGpuProgramCore> activeProgram = getActiveProgram(gptype);
-				if (activeProgram != nullptr)
+				UINT32 imageUnitCount = 0;
+				FrameVector<UINT32> imageUnits(6);
+				auto getImageUnit = [&](UINT32 binding)
 				{
-					GLuint glProgram = activeProgram->getGLHandle();
-
-					glProgramUniform1i(glProgram, unit, texUnit);
-				}
-			}
-			else
-			{
-				glBindTexture(mTextureInfos[texUnit].type, 0);
-				mTextureInfos[texUnit].samplerIdx = (UINT32)-1;
-			}
-
-			activateGLTextureUnit(0);
-		};
-
-		if (commandBuffer == nullptr)
-			executeRef(gptype, unit, texPtr);
-		else
-		{
-			auto execute = [=]() { executeRef(gptype, unit, texPtr); };
-
-			SPtr<GLCommandBuffer> cb = std::static_pointer_cast<GLCommandBuffer>(commandBuffer);
-			cb->queueCommand(execute);
-		}
-
-		BS_INC_RENDER_STAT(NumTextureBinds);
-	}
-
-	void GLRenderAPI::setSamplerState(GpuProgramType gptype, UINT16 unit, const SPtr<SamplerStateCore>& state,
-		const SPtr<CommandBuffer>& commandBuffer)
-	{
-		auto executeRef = [&](GpuProgramType gptype, UINT16 unit, const SPtr<SamplerStateCore>& state)
-		{
-			THROW_IF_NOT_CORE_THREAD;
-
-			const SamplerProperties& stateProps = state->getProperties();
+					UINT32 unit;
 
-			UINT16 texUnit = getGLTextureUnit(gptype, unit);
+					auto iterFind = std::find(imageUnits.begin(), imageUnits.end(), binding);
+					if (iterFind != imageUnits.end())
+						unit = *iterFind;
+					else
+					{
+						unit = imageUnitCount++;
+						imageUnits.push_back(binding);
+					}
 
-			// Set texture layer filtering
-			setTextureFiltering(texUnit, FT_MIN, stateProps.getTextureFiltering(FT_MIN));
-			setTextureFiltering(texUnit, FT_MAG, stateProps.getTextureFiltering(FT_MAG));
-			setTextureFiltering(texUnit, FT_MIP, stateProps.getTextureFiltering(FT_MIP));
+					return unit;
+				};
 
-			// Set texture anisotropy
-			setTextureAnisotropy(texUnit, stateProps.getTextureAnisotropy());
+				UINT32 uniformUnitCount = 0;
+				FrameVector<UINT32> uniformUnits(6);
+				auto getUniformUnit = [&](UINT32 binding)
+				{
+					UINT32 unit;
 
-			// Set mipmap biasing
-			setTextureMipmapBias(texUnit, stateProps.getTextureMipmapBias());
+					auto iterFind = std::find(uniformUnits.begin(), uniformUnits.end(), binding);
+					if (iterFind != uniformUnits.end())
+						unit = *iterFind;
+					else
+					{
+						unit = uniformUnitCount++;
+						uniformUnits.push_back(binding);
+					}
 
-			// Texture addressing mode
-			const UVWAddressingMode& uvw = stateProps.getTextureAddressingMode();
-			setTextureAddressingMode(texUnit, uvw);
+					return unit;
+				};
 
-			// Set border color
-			setTextureBorderColor(texUnit, stateProps.getBorderColor());
+				const UINT32 numStages = 6;
+				for(UINT32 i = 0; i < numStages; i++)
+				{
+					textureUnits.clear();
+					imageUnits.clear();
+					uniformUnits.clear();
 
-			mTextureInfos[texUnit].samplerIdx = unit;
-			mMaxBoundTexUnits[gptype] = std::max(mMaxBoundTexUnits[gptype], (UINT32)texUnit + 1);
-		};
+					GpuProgramType type = (GpuProgramType)i;
 
-		if (commandBuffer == nullptr)
-			executeRef(gptype, unit, state);
-		else
-		{
-			auto execute = [=]() { executeRef(gptype, unit, state); };
+					SPtr<GpuParamDesc> paramDesc = gpuParams->getParamDesc(type);
+					if (paramDesc == nullptr)
+						continue;
 
-			SPtr<GLCommandBuffer> cb = std::static_pointer_cast<GLCommandBuffer>(commandBuffer);
-			cb->queueCommand(execute);
-		}
+					for (auto& entry : paramDesc->textures)
+					{
+						UINT32 binding = entry.second.slot;
+						SPtr<TextureCore> texture = gpuParams->getTexture(entry.second.set, binding);
 
-		BS_INC_RENDER_STAT(NumSamplerBinds);
-	}
+						UINT32 unit = getTexUnit(binding);
+						if (!activateGLTextureUnit(unit))
+							continue;
 
-	void GLRenderAPI::setLoadStoreTexture(GpuProgramType gptype, UINT16 unit, const SPtr<TextureCore>& texPtr,
-		const TextureSurface& surface, const SPtr<CommandBuffer>& commandBuffer)
-	{
-		auto executeRef = [&](GpuProgramType gptype, UINT16 unit, const SPtr<TextureCore>& texPtr,
-			const TextureSurface& surface)
-		{
-			THROW_IF_NOT_CORE_THREAD;
+						GLTextureCore* glTex = static_cast<GLTextureCore*>(texture.get());
 
-			// TODO - OpenGL can't bind a certain subset of faces like DX11, only zero, one or all, so I'm ignoring numSlices parameter
+						if (glTex != nullptr)
+						{
+							GLenum newTextureType = glTex->getGLTextureTarget();
 
-			UINT32 imageUnit = getGLImageUnit(gptype, unit);
-			if (texPtr != nullptr)
-			{
-				SPtr<GLTextureCore> tex = std::static_pointer_cast<GLTextureCore>(texPtr);
-				glBindImageTexture(imageUnit, tex->getGLID(), surface.mipLevel, surface.numArraySlices > 1,
-					surface.arraySlice, GL_READ_WRITE, tex->getGLFormat());
+							if (mTextureInfos[unit].type != newTextureType)
+								glBindTexture(mTextureInfos[unit].type, 0);
 
-				mImageInfos[imageUnit].uniformIdx = unit;
-				mMaxBoundImageUnits[gptype] = std::max(mMaxBoundImageUnits[gptype], imageUnit + 1);
+							glBindTexture(newTextureType, glTex->getGLID());
+							mTextureInfos[unit].type = newTextureType;
 
-				SPtr<GLSLGpuProgramCore> activeProgram = getActiveProgram(gptype);
-				if (activeProgram != nullptr)
-				{
-					GLuint glProgram = activeProgram->getGLHandle();
+							SPtr<GLSLGpuProgramCore> activeProgram = getActiveProgram(type);
+							if (activeProgram != nullptr)
+							{
+								GLuint glProgram = activeProgram->getGLHandle();
 
-					glProgramUniform1i(glProgram, unit, imageUnit);
-				}
-			}
-			else
-			{
-				mImageInfos[imageUnit].uniformIdx = (UINT32)-1;
-				glBindImageTexture(imageUnit, 0, 0, false, 0, GL_READ_WRITE, GL_R32F);
-			}
-		};
-
-		if (commandBuffer == nullptr)
-			executeRef(gptype, unit, texPtr, surface);
-		else
-		{
-			auto execute = [=]() { executeRef(gptype, unit, texPtr, surface); };
+								glProgramUniform1i(glProgram, binding, unit);
+							}
+						}
+						else
+							glBindTexture(mTextureInfos[unit].type, 0);
+					}
 
-			SPtr<GLCommandBuffer> cb = std::static_pointer_cast<GLCommandBuffer>(commandBuffer);
-			cb->queueCommand(execute);
-		}
+					for(auto& entry : paramDesc->samplers)
+					{
+						UINT32 binding = entry.second.slot;
+						SPtr<SamplerStateCore> samplerState = gpuParams->getSamplerState(entry.second.set, binding);
 
-		BS_INC_RENDER_STAT(NumTextureBinds);
-	}
+						if (samplerState == nullptr)
+							samplerState = SamplerStateCore::getDefault();
 
-	/** @copydoc RenderAPICore::setBuffer */
-	void GLRenderAPI::setBuffer(GpuProgramType gptype, UINT16 unit, const SPtr<GpuBufferCore>& buffer, bool loadStore,
-		const SPtr<CommandBuffer>& commandBuffer)
-	{
-		auto executeRef = [&](GpuProgramType gptype, UINT16 unit, const SPtr<GpuBufferCore>& buffer, bool loadStore)
-		{
-			THROW_IF_NOT_CORE_THREAD;
+						UINT32 unit = getTexUnit(binding);
+						if (!activateGLTextureUnit(unit))
+							continue;
 
-			SPtr<GLGpuBufferCore> glBuffer = std::static_pointer_cast<GLGpuBufferCore>(buffer);
-			if (!loadStore)
-			{
-				UINT32 texUnit = getGLTextureUnit(gptype, unit);
-				if (!activateGLTextureUnit(texUnit))
-					return;
+						const SamplerProperties& stateProps = samplerState->getProperties();
 
-				if (glBuffer != nullptr)
-				{
-					if (mTextureInfos[texUnit].type != GL_TEXTURE_BUFFER)
-						glBindTexture(mTextureInfos[texUnit].type, 0);
+						setTextureFiltering(unit, FT_MIN, stateProps.getTextureFiltering(FT_MIN));
+						setTextureFiltering(unit, FT_MAG, stateProps.getTextureFiltering(FT_MAG));
+						setTextureFiltering(unit, FT_MIP, stateProps.getTextureFiltering(FT_MIP));
 
-					mTextureInfos[texUnit].type = GL_TEXTURE_BUFFER;
-					mTextureInfos[texUnit].samplerIdx = unit;
+						setTextureAnisotropy(unit, stateProps.getTextureAnisotropy());
+						setTextureMipmapBias(unit, stateProps.getTextureMipmapBias());
 
-					mMaxBoundTexUnits[gptype] = std::max(mMaxBoundTexUnits[gptype], texUnit + 1);
+						const UVWAddressingMode& uvw = stateProps.getTextureAddressingMode();
+						setTextureAddressingMode(unit, uvw);
 
-					glBindTexture(GL_TEXTURE_BUFFER, glBuffer->getGLTextureId());
+						setTextureBorderColor(unit, stateProps.getBorderColor());
+					}
 
-					SPtr<GLSLGpuProgramCore> activeProgram = getActiveProgram(gptype);
-					if (activeProgram != nullptr)
+					for(auto& entry : paramDesc->buffers)
 					{
-						GLuint glProgram = activeProgram->getGLHandle();
+						UINT32 binding = entry.second.slot;
+						SPtr<GpuBufferCore> buffer = gpuParams->getBuffer(entry.second.set, binding);
+
+						bool isLoadStore = entry.second.type != GPOT_BYTE_BUFFER &&
+							entry.second.type != GPOT_STRUCTURED_BUFFER;
+
+						GLGpuBufferCore* glBuffer = static_cast<GLGpuBufferCore*>(buffer.get());
+						if (!isLoadStore)
+						{
+							UINT32 unit = getTexUnit(binding);
+							if (!activateGLTextureUnit(unit))
+								continue;
+
+							if (glBuffer != nullptr)
+							{
+								if (mTextureInfos[unit].type != GL_TEXTURE_BUFFER)
+									glBindTexture(mTextureInfos[unit].type, 0);
+
+								mTextureInfos[unit].type = GL_TEXTURE_BUFFER;
+
+								glBindTexture(GL_TEXTURE_BUFFER, glBuffer->getGLTextureId());
+
+								SPtr<GLSLGpuProgramCore> activeProgram = getActiveProgram(type);
+								if (activeProgram != nullptr)
+								{
+									GLuint glProgram = activeProgram->getGLHandle();
+
+									glProgramUniform1i(glProgram, binding, unit);
+								}
+							}
+							else
+								glBindTexture(mTextureInfos[unit].type, 0);
+						}
+						else
+						{
+							UINT32 unit = getImageUnit(binding);
+							if (glBuffer != nullptr)
+							{
+								glBindImageTexture(unit, glBuffer->getGLTextureId(), 0, false,
+									0, GL_READ_WRITE, glBuffer->getGLFormat());
+
+								SPtr<GLSLGpuProgramCore> activeProgram = getActiveProgram(type);
+								if (activeProgram != nullptr)
+								{
+									GLuint glProgram = activeProgram->getGLHandle();
+
+									glProgramUniform1i(glProgram, binding, unit);
+								}
+							}
+							else
+								glBindImageTexture(unit, 0, 0, false, 0, GL_READ_WRITE, GL_R32F);
+						}
+					}
 
-						glProgramUniform1i(glProgram, unit, texUnit);
+					for(auto& entry : paramDesc->loadStoreTextures)
+					{
+						UINT32 binding = entry.second.slot;
+
+						SPtr<TextureCore> texture = gpuParams->getLoadStoreTexture(entry.second.set, binding);
+						const TextureSurface& surface = gpuParams->getLoadStoreSurface(entry.second.set, binding);
+
+						UINT32 unit = getImageUnit(binding);
+						if (texture != nullptr)
+						{
+							GLTextureCore* tex = static_cast<GLTextureCore*>(texture.get());
+							glBindImageTexture(unit, tex->getGLID(), surface.mipLevel, surface.numArraySlices > 1,
+								surface.arraySlice, GL_READ_WRITE, tex->getGLFormat());
+
+							SPtr<GLSLGpuProgramCore> activeProgram = getActiveProgram(type);
+							if (activeProgram != nullptr)
+							{
+								GLuint glProgram = activeProgram->getGLHandle();
+
+								glProgramUniform1i(glProgram, binding, unit);
+							}
+						}
+						else
+							glBindImageTexture(unit, 0, 0, false, 0, GL_READ_WRITE, GL_R32F);
 					}
-				}
-				else
-				{
-					mTextureInfos[texUnit].samplerIdx = (UINT32)-1;
-					glBindTexture(mTextureInfos[texUnit].type, 0);
-				}
 
-				activateGLTextureUnit(0);
-			}
-			else
-			{
-				UINT32 imageUnit = getGLImageUnit(gptype, unit);
-				if (glBuffer != nullptr)
-				{
-					glBindImageTexture(imageUnit, glBuffer->getGLTextureId(), 0, false,
-						0, GL_READ_WRITE, glBuffer->getGLFormat());
+					for (auto& entry : paramDesc->paramBlocks)
+					{
+						UINT32 binding = entry.second.slot;
+						SPtr<GpuParamBlockBufferCore> buffer = gpuParams->getParamBlockBuffer(entry.second.set, binding);
+						
+						if (buffer == nullptr)
+							continue;
 
-					mImageInfos[imageUnit].uniformIdx = unit;
-					mMaxBoundImageUnits[gptype] = std::max(mMaxBoundImageUnits[gptype], imageUnit + 1);
+						buffer->flushToGPU();
 
-					SPtr<GLSLGpuProgramCore> activeProgram = getActiveProgram(gptype);
-					if (activeProgram != nullptr)
-					{
+						SPtr<GLSLGpuProgramCore> activeProgram = getActiveProgram(type);
 						GLuint glProgram = activeProgram->getGLHandle();
 
-						glProgramUniform1i(glProgram, unit, imageUnit);
+						// 0 means uniforms are not in block, in which case we handle it specially
+						if (binding == 0)
+						{
+							UINT8* uniformBufferData = (UINT8*)bs_stack_alloc(buffer->getSize());
+							buffer->read(0, uniformBufferData, buffer->getSize());
+
+							for (auto iter = paramDesc->params.begin(); iter != paramDesc->params.end(); ++iter)
+							{
+								const GpuParamDataDesc& param = iter->second;
+
+								if (param.paramBlockSlot != 0) // 0 means uniforms are not in a block
+									continue;
+
+								const UINT8* ptrData = uniformBufferData + param.cpuMemOffset * sizeof(UINT32);
+
+								// Note: We don't transpose matrices here even though we don't use column major format
+								// because they are assumed to be pre-transposed in the GpuParams buffer
+								switch (param.type)
+								{
+								case GPDT_FLOAT1:
+									glProgramUniform1fv(glProgram, param.gpuMemOffset, param.arraySize, (GLfloat*)ptrData);
+									break;
+								case GPDT_FLOAT2:
+									glProgramUniform2fv(glProgram, param.gpuMemOffset, param.arraySize, (GLfloat*)ptrData);
+									break;
+								case GPDT_FLOAT3:
+									glProgramUniform3fv(glProgram, param.gpuMemOffset, param.arraySize, (GLfloat*)ptrData);
+									break;
+								case GPDT_FLOAT4:
+									glProgramUniform4fv(glProgram, param.gpuMemOffset, param.arraySize, (GLfloat*)ptrData);
+									break;
+								case GPDT_MATRIX_2X2:
+									glProgramUniformMatrix2fv(glProgram, param.gpuMemOffset, param.arraySize,
+										GL_FALSE, (GLfloat*)ptrData);
+									break;
+								case GPDT_MATRIX_2X3:
+									glProgramUniformMatrix3x2fv(glProgram, param.gpuMemOffset, param.arraySize,
+										GL_FALSE, (GLfloat*)ptrData);
+									break;
+								case GPDT_MATRIX_2X4:
+									glProgramUniformMatrix4x2fv(glProgram, param.gpuMemOffset, param.arraySize,
+										GL_FALSE, (GLfloat*)ptrData);
+									break;
+								case GPDT_MATRIX_3X2:
+									glProgramUniformMatrix2x3fv(glProgram, param.gpuMemOffset, param.arraySize,
+										GL_FALSE, (GLfloat*)ptrData);
+									break;
+								case GPDT_MATRIX_3X3:
+									glProgramUniformMatrix3fv(glProgram, param.gpuMemOffset, param.arraySize,
+										GL_FALSE, (GLfloat*)ptrData);
+									break;
+								case GPDT_MATRIX_3X4:
+									glProgramUniformMatrix4x3fv(glProgram, param.gpuMemOffset, param.arraySize,
+										GL_FALSE, (GLfloat*)ptrData);
+									break;
+								case GPDT_MATRIX_4X2:
+									glProgramUniformMatrix2x4fv(glProgram, param.gpuMemOffset, param.arraySize,
+										GL_FALSE, (GLfloat*)ptrData);
+									break;
+								case GPDT_MATRIX_4X3:
+									glProgramUniformMatrix3x4fv(glProgram, param.gpuMemOffset, param.arraySize,
+										GL_FALSE, (GLfloat*)ptrData);
+									break;
+								case GPDT_MATRIX_4X4:
+									glProgramUniformMatrix4fv(glProgram, param.gpuMemOffset, param.arraySize,
+										GL_FALSE, (GLfloat*)ptrData);
+									break;
+								case GPDT_INT1:
+									glProgramUniform1iv(glProgram, param.gpuMemOffset, param.arraySize, (GLint*)ptrData);
+									break;
+								case GPDT_INT2:
+									glProgramUniform2iv(glProgram, param.gpuMemOffset, param.arraySize, (GLint*)ptrData);
+									break;
+								case GPDT_INT3:
+									glProgramUniform3iv(glProgram, param.gpuMemOffset, param.arraySize, (GLint*)ptrData);
+									break;
+								case GPDT_INT4:
+									glProgramUniform4iv(glProgram, param.gpuMemOffset, param.arraySize, (GLint*)ptrData);
+									break;
+								case GPDT_BOOL:
+									glProgramUniform1uiv(glProgram, param.gpuMemOffset, param.arraySize, (GLuint*)ptrData);
+									break;
+								default:
+								case GPDT_UNKNOWN:
+									break;
+								}
+							}
+
+							if (uniformBufferData != nullptr)
+							{
+								bs_stack_free(uniformBufferData);
+							}
+						}
+						else
+						{
+							const GLGpuParamBlockBufferCore* glParamBlockBuffer = static_cast<const GLGpuParamBlockBufferCore*>(buffer.get());
+
+							UINT32 unit = getUniformUnit(binding - 1);
+							glUniformBlockBinding(glProgram, binding - 1, unit);
+							glBindBufferRange(GL_UNIFORM_BUFFER, unit, glParamBlockBuffer->getGLHandle(), 0,
+								glParamBlockBuffer->getSize());
+						}
 					}
 				}
-				else
-				{
-					mImageInfos[imageUnit].uniformIdx = (UINT32)-1;
-					glBindImageTexture(imageUnit, 0, 0, false, 0, GL_READ_WRITE, GL_R32F);
-				}
 			}
+			bs_frame_clear();
+
+			activateGLTextureUnit(0);
 		};
 
 		if (commandBuffer == nullptr)
-			executeRef(gptype, unit, buffer, loadStore);
+			executeRef(gpuParams);
 		else
 		{
-			auto execute = [=]() { executeRef(gptype, unit, buffer, loadStore); };
+			auto execute = [=]() { executeRef(gpuParams); };
 
 			SPtr<GLCommandBuffer> cb = std::static_pointer_cast<GLCommandBuffer>(commandBuffer);
 			cb->queueCommand(execute);
 		}
 
-		BS_INC_RENDER_STAT(NumTextureBinds);
+		BS_INC_RENDER_STAT(NumGpuParamBinds);
 	}
 
 	void GLRenderAPI::setStencilRef(UINT32 stencilRefValue, const SPtr<CommandBuffer>& commandBuffer)
@@ -1356,42 +1326,23 @@ namespace BansheeEngine
 
 	void GLRenderAPI::setTextureAddressingMode(UINT16 unit, const UVWAddressingMode& uvw)
 	{
-		if (!activateGLTextureUnit(unit))
-			return;
-
-		glTexParameteri(mTextureInfos[unit].type, GL_TEXTURE_WRAP_S,
-			getTextureAddressingMode(uvw.u));
-		glTexParameteri(mTextureInfos[unit].type, GL_TEXTURE_WRAP_T,
-			getTextureAddressingMode(uvw.v));
-		glTexParameteri(mTextureInfos[unit].type, GL_TEXTURE_WRAP_R,
-			getTextureAddressingMode(uvw.w));
-		activateGLTextureUnit(0);
+		glTexParameteri(mTextureInfos[unit].type, GL_TEXTURE_WRAP_S, getTextureAddressingMode(uvw.u));
+		glTexParameteri(mTextureInfos[unit].type, GL_TEXTURE_WRAP_T, getTextureAddressingMode(uvw.v));
+		glTexParameteri(mTextureInfos[unit].type, GL_TEXTURE_WRAP_R, getTextureAddressingMode(uvw.w));
 	}
 
-	void GLRenderAPI::setTextureBorderColor(UINT16 unit, const Color& colour)
+	void GLRenderAPI::setTextureBorderColor(UINT16 unit, const Color& color)
 	{
-		GLfloat border[4] = { colour.r, colour.g, colour.b, colour.a };
-		if (activateGLTextureUnit(unit))
-		{
-			glTexParameterfv(mTextureInfos[unit].type, GL_TEXTURE_BORDER_COLOR, border);
-			activateGLTextureUnit(0);
-		}
+		GLfloat border[4] = { color.r, color.g, color.b, color.a };
+		glTexParameterfv(mTextureInfos[unit].type, GL_TEXTURE_BORDER_COLOR, border);
 	}
 
 	void GLRenderAPI::setTextureMipmapBias(UINT16 unit, float bias)
 	{
-		if (mCurrentCapabilities->hasCapability(RSC_MIPMAP_LOD_BIAS))
-		{
-			if (activateGLTextureUnit(unit))
-			{
-				glTexParameterf(mTextureInfos[unit].type, GL_TEXTURE_LOD_BIAS, bias);
-				activateGLTextureUnit(0);
-			}
-		}
-
+		glTexParameterf(mTextureInfos[unit].type, GL_TEXTURE_LOD_BIAS, bias);
 	}
 
-	void GLRenderAPI::setSceneBlending(BlendFactor sourceFactor, BlendFactor destFactor, BlendOperation op )
+	void GLRenderAPI::setSceneBlending(BlendFactor sourceFactor, BlendFactor destFactor, BlendOperation op)
 	{
 		GLint sourceBlend = getBlendMode(sourceFactor);
 		GLint destBlend = getBlendMode(destFactor);
@@ -1751,9 +1702,6 @@ namespace BansheeEngine
 
 	void GLRenderAPI::setTextureFiltering(UINT16 unit, FilterType ftype, FilterOptions fo)
 	{
-		if (!activateGLTextureUnit(unit))
-			return;
-
 		switch(ftype)
 		{
 		case FT_MIN:
@@ -1782,18 +1730,10 @@ namespace BansheeEngine
 			glTexParameteri(mTextureInfos[unit].type, GL_TEXTURE_MIN_FILTER, getCombinedMinMipFilter());
 			break;
 		}
-
-		activateGLTextureUnit(0);
 	}
 
 	void GLRenderAPI::setTextureAnisotropy(UINT16 unit, UINT32 maxAnisotropy)
 	{
-		if (!mCurrentCapabilities->hasCapability(RSC_ANISOTROPY))
-			return;
-
-		if (!activateGLTextureUnit(unit))
-			return;
-
 		GLfloat maxSupportAnisotropy = 0;
 		glGetFloatv(GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT, &maxSupportAnisotropy);
 		if (maxAnisotropy > maxSupportAnisotropy)
@@ -1805,8 +1745,6 @@ namespace BansheeEngine
 
 		if (getCurrentAnisotropy(unit) != maxAnisotropy)
 			glTexParameterf(mTextureInfos[unit].type, GL_TEXTURE_MAX_ANISOTROPY_EXT, (float)maxAnisotropy);
-
-		activateGLTextureUnit(0);
 	}
 
 	bool GLRenderAPI::activateGLTextureUnit(UINT16 unit)
@@ -2064,70 +2002,6 @@ namespace BansheeEngine
 		return primType;
 	}
 
-	UINT32 GLRenderAPI::getGLTextureUnit(GpuProgramType gptype, UINT32 samplerIdx)
-	{
-		if (gptype == GPT_DOMAIN_PROGRAM || gptype == GPT_HULL_PROGRAM)
-		{
-			LOGERR("OpenGL cannot assign textures to this gpu program type: " + toString(gptype));
-			return 0;
-		}
-
-		UINT32 offset = mTextureUnitOffsets[gptype];
-		for(UINT32 i = offset; i < mMaxBoundTexUnits[gptype]; i++)
-		{
-			UINT32 texUnit = i;
-
-			if (mTextureInfos[texUnit].samplerIdx == samplerIdx || mTextureInfos[texUnit].samplerIdx == (UINT32)-1)
-				return texUnit;
-		}
-
-		INT32 numSupportedUnits = (INT32)mCurrentCapabilities->getNumTextureUnits(gptype);
-		INT32 numBoundTexUnits = (INT32)mMaxBoundTexUnits[gptype] - (INT32)offset;
-		if (numBoundTexUnits < numSupportedUnits)
-			return mMaxBoundTexUnits[gptype];
-
-		LOGERR("Cannot find an empty slot to bind a texture to.");
-		return 0;
-	}
-
-	UINT32 GLRenderAPI::getGLImageUnit(GpuProgramType gptype, UINT32 uniformIdx)
-	{
-		if (gptype != GPT_FRAGMENT_PROGRAM && gptype != GPT_COMPUTE_PROGRAM)
-		{
-			LOGERR("OpenGL cannot assign load-store textures to this gpu program type: " + toString(gptype));
-			return 0;
-		}
-
-		for (UINT32 i = 0; i < mMaxBoundImageUnits[gptype]; i++)
-		{
-			UINT32 imageUnit = i;
-
-			if (mImageInfos[imageUnit].uniformIdx == uniformIdx || mImageInfos[imageUnit].uniformIdx == (UINT32)-1)
-				return imageUnit;
-		}
-
-		UINT32 numSupportedUnits = mCurrentCapabilities->getNumLoadStoreTextureUnits(gptype);
-		UINT32 numBoundImageUnits = mMaxBoundImageUnits[gptype];
-		if (numBoundImageUnits < numSupportedUnits)
-			return mMaxBoundImageUnits[gptype];
-
-		LOGERR("Cannot find an empty slot to bind a load-store texture to.");
-		return 0;
-	}
-
-	UINT32 GLRenderAPI::getGLUniformBlockBinding(GpuProgramType gptype, UINT32 binding)
-	{
-		UINT32 maxNumBindings = mCurrentCapabilities->getNumGpuParamBlockBuffers(gptype);
-		if (binding >= maxNumBindings)
-		{
-			LOGERR("Invalid buffer binding for the provided stage. Buffer binding: " + toString(binding) + ". Stage: " +
-				toString(gptype) + ". Supported range is 0 .. " + toString(maxNumBindings - 1));
-			return 0;
-		}
-
-		return mUBOffsets[gptype] + binding;
-	}
-
 	SPtr<GLSLGpuProgramCore> GLRenderAPI::getActiveProgram(GpuProgramType gptype) const
 	{
 		switch (gptype)
@@ -2192,16 +2066,8 @@ namespace BansheeEngine
 		}
 
 		UINT32 curTexUnitOffset = 0;
-		UINT32 curUBOffset = 0;
 		for (UINT32 i = 0; i < 6; i++)
-		{
-			mTextureUnitOffsets[i] = curTexUnitOffset;
-			mMaxBoundTexUnits[i] = curTexUnitOffset;
-			mUBOffsets[i] = curUBOffset;
-
 			curTexUnitOffset += caps->getNumTextureUnits((GpuProgramType)i);
-			curUBOffset += caps->getNumGpuParamBlockBuffers((GpuProgramType)i);
-		}
 
 		UINT32 totalNumTexUnits = curTexUnitOffset;
 		UINT16 numCombinedTexUnits = caps->getNumCombinedTextureUnits();
@@ -2212,22 +2078,8 @@ namespace BansheeEngine
 		mNumTextureUnits = numCombinedTexUnits;
 		mTextureInfos = bs_newN<TextureInfo>(mNumTextureUnits);
 		for (UINT16 i = 0; i < mNumTextureUnits; i++)
-		{
-			mTextureInfos[i].samplerIdx = (UINT32)-1;
 			mTextureInfos[i].type = GL_TEXTURE_2D;
-		}
-
-		UINT32 totalNumUniformBlocks = curUBOffset;
-		UINT16 numCombinedUniformBlocks = caps->getNumCombinedGpuParamBlockBuffers();
-
-		if(totalNumUniformBlocks > numCombinedUniformBlocks)
-			BS_EXCEPT(InternalErrorException, "Number of combined uniform block buffers less than the number of individual per-stage buffers!?");
-
-		mNumImageUnits = caps->getNumCombinedLoadStoreTextureUnits();
-		mImageInfos = bs_newN<ImageInfo>(mNumImageUnits);
-		for (UINT16 i = 0; i < mNumImageUnits; i++)
-			mImageInfos[i].uniformIdx = (UINT32)-1;
-
+		
 		TextureManager::startUp<GLTextureManager>(std::ref(*mGLSupport));
 		TextureCoreManager::startUp<GLTextureCoreManager>(std::ref(*mGLSupport));
 	}