Преглед изворни кода

Preparing documentation for Doxygen generation

BearishSun пре 10 година
родитељ
комит
70d6e36394

+ 58 - 57
BansheeCore/Include/BsEventQuery.h

@@ -1,58 +1,59 @@
-#pragma once
-
-#include "BsCorePrerequisites.h"
-
-namespace BansheeEngine
-{
-	/**
-	 * @brief	Represents a GPU query that gets triggered when GPU starts processing the query.
-	 * 			
-	 * @note	Normally GPU will have many commands in its command buffer. When EventQuery::begin is called it is placed
-	 * 			in that command buffer. Once the buffer empties and GPU reaches the EventQuery command, the query
-	 * 			callback is triggered.
-	 * 			
-	 *			Core thread only.
-	 */
-	class BS_CORE_EXPORT EventQuery
-	{
-	public:
-		EventQuery()
-			:mActive(false) {}
-		virtual ~EventQuery() {}
-
-		/**
-		 * @brief	Starts the query. 
-		 * 			
-		 * @note	Once the query is started you may poll "isReady" method to check when query has finished,
-		 * 			or you may hook up an "onTriggered" callback and be notified that way.
-		 */
-		virtual void begin() = 0;
-
-		/**
-		 * @brief	Check if GPU has processed the query.
-		 */
-		virtual bool isReady() const = 0;
-
-		/**
-		 * @brief	Triggered when GPU starts processing the query.
-		 */
-		Event<void()> onTriggered;
-
-		/**
-		 * @brief	Creates a new query, but does not schedule it on GPU.
-		 */
-		static EventQueryPtr create();
-
-	protected:
-		friend class QueryManager;
-
-		/**
-		 * @brief	Returns true if the has still not been completed by the GPU.
-		 */
-		bool isActive() const { return mActive; }
-		void setActive(bool active) { mActive = active; }
-
-	protected:
-		bool mActive;
-	};
+#pragma once
+
+#include "BsCorePrerequisites.h"
+
+namespace BansheeEngine
+{
+	/** @cond INTERNAL */
+	/** @addtogroup RenderAPI
+	 *  @{
+	 */
+
+	/**
+	 * Represents a GPU query that gets triggered when GPU starts processing the query.
+	 * 			
+	 * @note	
+	 * Normally GPU will have many commands in its command buffer. When begin() is called it is placed in that command 
+	 * buffer. Once the buffer empties and GPU reaches the EventQuery command, the query callback is triggered.
+	 * @note			
+	 * Core thread only.
+	 */
+	class BS_CORE_EXPORT EventQuery
+	{
+	public:
+		EventQuery()
+			:mActive(false) {}
+		virtual ~EventQuery() {}
+
+		/**
+		 * Starts the query. 
+		 * 			
+		 * @note	
+		 * Once the query is started you may poll isReady() method to check when query has finished, or you may hook up 
+		 * an ::onTriggered callback and be notified that way.
+		 */
+		virtual void begin() = 0;
+
+		/** Check if GPU has processed the query. */
+		virtual bool isReady() const = 0;
+
+		/**	Triggered when GPU starts processing the query. */
+		Event<void()> onTriggered;
+
+		/**	Creates a new query, but does not schedule it on GPU. */
+		static EventQueryPtr create();
+
+	protected:
+		friend class QueryManager;
+
+		/**	Returns true if the has still not been completed by the GPU. */
+		bool isActive() const { return mActive; }
+		void setActive(bool active) { mActive = active; }
+
+	protected:
+		bool mActive;
+	};
+
+	/** @} */
+	/** @endcond */
 }

+ 215 - 238
BansheeCore/Include/BsGpuBuffer.h

@@ -1,238 +1,215 @@
-#pragma once
-
-#include "BsCorePrerequisites.h"
-#include "BsGpuBufferView.h"
-#include "BsCoreObject.h"
-
-namespace BansheeEngine 
-{
-	/**
-	 * @brief	Information about a GPU buffer.
-	 */
-	class BS_CORE_EXPORT GpuBufferProperties
-	{
-	public:
-		GpuBufferProperties(UINT32 elementCount, UINT32 elementSize, GpuBufferType type, 
-			GpuBufferUsage usage, bool randomGpuWrite, bool useCounter);
-
-		/**
-		 * @brief	Returns the type of the GPU buffer. Type determines which kind of views (if any) can be created
-		 *			for the buffer, and how is data read or modified in it.
-		 */
-		GpuBufferType getType() const { return mType; }
-
-		/**
-		 * @brief	Returns buffer usage which determines how are planning on updating the buffer contents.
-		 */
-		GpuBufferUsage getUsage() const { return mUsage; }
-
-		/**
-		 * @brief	Return whether the buffer supports random reads and writes within the GPU programs.
-		 */
-		bool getRandomGpuWrite() const { return mRandomGpuWrite; }
-
-		/**
-		 * @brief	Returns whether the buffer supports counter use within GPU programs.
-		 */
-		bool getUseCounter() const { return mUseCounter; }
-
-		/**
-		 * @brief	Returns number of elements in the buffer.
-		 */
-		UINT32 getElementCount() const { return mElementCount; }
-
-		/**
-		 * @brief	Returns size of a single element in the buffer in bytes.
-		 */
-		UINT32 getElementSize() const { return mElementSize; }
-
-	protected:
-		GpuBufferType mType;
-		GpuBufferUsage mUsage;
-		bool mRandomGpuWrite;
-		bool mUseCounter;
-		UINT32 mElementCount;
-		UINT32 mElementSize;
-	};
-
-	/**
-	 * @brief	Core thread version of a GpuBuffer.
-	 *
-	 * @see		GpuBuffer
-	 *
-	 * @note	Core thread only.
-	 */
-	class BS_CORE_EXPORT GpuBufferCore : public CoreObjectCore
-	{
-	public:
-		virtual ~GpuBufferCore();
-
-		/**
-		 * @brief	Locks the buffer returning a pointer to the internal buffer data that you may then
-		 *			read or write to. Caller must ensure it will only perform actions promised in the
-		 *			provided GPU lock options parameter.
-		 *
-		 * @param	offset	Number of bytes at which to lock the buffer. Returned pointer points to this location.
-		 * @param	length	Number of bytes to lock.
-		 * @param	options How to lock the buffer. Certain options offer better performance than others.
-		 */
-		virtual void* lock(UINT32 offset, UINT32 length, GpuLockOptions options) = 0;
-
-		/**
-		 * @brief	Unlocks a previously locked buffer. Any pointers to internal buffers returned when
-		 *			it was locked will become invalid.
-		 */
-		virtual void unlock() = 0;
-
-		/**
-		 * @brief	Reads buffer data into the previously allocated buffer.
-		 *
-		 * @param	offset	Number of bytes at which to start reading the buffer.
-		 * @param	length	Number of bytes to read.
-		 * @param	pDest	Previously allocated buffer of "length" bytes size.
-		 */
-        virtual void readData(UINT32 offset, UINT32 length, void* pDest) = 0;
-
-		/**
-		* @brief	Writes data into the buffer.
-		*
-		* @param	offset		Number of bytes at which to start writing to the buffer.
-		* @param	length		Number of bytes to write.
-		* @param	pDest		Previously allocated buffer used to retrieve the data from.
-		* @param	writeFlags  Flags that may be used to improve performance for specific use cases.
-		*/
-        virtual void writeData(UINT32 offset, UINT32 length, const void* pSource, BufferWriteType writeFlags = BufferWriteType::Normal) = 0;
-
-		/**
-		 * @brief	Copies data from another buffer into this buffer.
-		 *
-		 * @param	srcBuffer			Buffer to copy the data from.
-		 * @param	srcOffset			Offset in bytes into the source buffer - this is where reading starts from.
-		 * @param	dstOffset			Offset in bytes into the destination buffer - this is where writing starts from.
-		 * @param	length				Number of bytes to copy from source to destination.
-		 * @param	discardWholeBuffer	If true, the contents of the current buffer will be entirely discarded. This can improve
-		 *								performance if you know you wont be needing that data any more.
-		 */
-		virtual void copyData(GpuBufferCore& srcBuffer, UINT32 srcOffset,
-			UINT32 dstOffset, UINT32 length, bool discardWholeBuffer = false) = 0;
-
-		/**
-		 * @brief	Returns properties describing the buffer.
-		 */
-		const GpuBufferProperties& getProperties() const { return mProperties; }
-
-		/**
-		 * @brief	Creates a buffer view that may be used for binding a buffer to a slot in the pipeline. Views allow you to specify
-		 *			how is data in the buffer organized to make it easier for the pipeline to interpret.
-		 *
-		 * @param	buffer			Buffer to create the view for.
-		 * @param	firstElement	Position of the first element visible by the view.
-		 * @param	elementWidth	Width of one element in bytes.
-		 * @param	numElements		Number of elements in the buffer.
-		 * @param	useCounter		Should the buffer allow use of a counter. This is only relevant for random read write buffers.
-		 * @param	usage			Determines type of the view we are creating, and which slots in the pipeline will the view be bindable to.
-		 *
-		 * @note	If a view with this exact parameters already exists, it will be returned and new one will not be created.
-		 *
-		 *			Only Default and RandomWrite views are supported for this type of buffer. 
-		 *			TODO Low Priority: Perhaps reflect this limitation by having an enum with only
-		 *			those two options?
-		 */
-		static GpuBufferView* requestView(const SPtr<GpuBufferCore>& buffer, UINT32 firstElement, UINT32 elementWidth, UINT32 numElements, bool useCounter, GpuViewUsage usage);
-
-		/**
-		 * @brief	Releases a view created with requestView. 
-		 *
-		 * @note	View will only truly get released once all references to it are released.
-		 */
-		static void releaseView(GpuBufferView* view);
-
-	protected:
-		GpuBufferCore(UINT32 elementCount, UINT32 elementSize, GpuBufferType type, 
-			GpuBufferUsage usage, bool randomGpuWrite = false, bool useCounter = false);
-
-		/**
-		 * @brief	Creates an empty view for the current buffer.
-		 */
-		virtual GpuBufferView* createView() = 0;
-
-		/**
-		 * @brief	Destroys a view previously created for this buffer.
-		 */
-		virtual void destroyView(GpuBufferView* view) = 0;
-
-		/**
-		 * @brief	Destroys all buffer views regardless if their reference count is
-		 *			zero or not.
-		 */
-		void clearBufferViews();
-
-		/**
-		 * @brief	Helper class to help with reference counting for GPU buffer views.
-		 */
-		struct GpuBufferReference
-		{
-			GpuBufferReference(GpuBufferView* _view)
-				:view(_view), refCount(0)
-			{ }
-
-			GpuBufferView* view;
-			UINT32 refCount;
-		};
-
-		UnorderedMap<GPU_BUFFER_DESC, GpuBufferReference*, GpuBufferView::HashFunction, GpuBufferView::EqualFunction> mBufferViews;
-		GpuBufferProperties mProperties;
-	};
-
-	/**
-	 * @brief	Handles a generic GPU buffer that you may use for storing any kind of data you wish to be accessible
-	 *			to the GPU. These buffers may be bounds to GPU program binding slots and accessed from a GPU program,
-	 *			or may be used by fixed pipeline in some way.
-	 *
-	 *			Buffer types:
-	 *			  - Raw buffers containing a block of bytes that are up to the GPU program to interpret.
-	 *			  - Structured buffer containing an array of structures compliant to a certain layout. Similar to raw
-	 *				buffer but easier to interpret the data.
-	 *			  - Random read/write buffers that allow you to write to random parts of the buffer from within
-	 *				the GPU program, and then read it later. These can only be bound to pixel and compute stages.
-	 *			  - Append/Consume buffers also allow you to write to them, but in a stack-like fashion, usually where one set
-	 *				of programs produces data while other set consumes it from the same buffer. Append/Consume buffers are structured
-	 *				by default.
-	 *
-	 * @note	Sim thread only.
-	 */
-	class BS_CORE_EXPORT GpuBuffer : public CoreObject
-    {
-    public:
-		virtual ~GpuBuffer() { }
-
-		/**
-		 * @brief	Returns properties describing the buffer.
-		 */
-		const GpuBufferProperties& getProperties() const { return mProperties; }
-
-		/**
-		 * @brief	Retrieves a core implementation of a GPU buffer usable only from the
-		 *			core thread.
-		 */
-		SPtr<GpuBufferCore> getCore() const;
-
-	protected:
-		friend class HardwareBufferManager;
-
-		GpuBuffer(UINT32 elementCount, UINT32 elementSize, GpuBufferType type, GpuBufferUsage usage, 
-			bool randomGpuWrite = false, bool useCounter = false);
-
-		/**
-		 * @copydoc	CoreObject::createCore
-		 */
-		SPtr<CoreObjectCore> createCore() const override;
-
-		/**
-		 * @copydoc	HardwareBufferManager::createGpuParamBlockBuffer
-		 */
-		static GpuParamBlockBufferPtr create(UINT32 size, GpuParamBlockUsage usage = GPBU_DYNAMIC);
-
-		GpuBufferProperties mProperties;
-    };
-}
+#pragma once
+
+#include "BsCorePrerequisites.h"
+#include "BsGpuBufferView.h"
+#include "BsCoreObject.h"
+
+namespace BansheeEngine 
+{
+	/** @addtogroup RenderAPI
+	 *  @{
+	 */
+
+	/** 
+	 * Information about a GpuBuffer. Allows core and non-core versions of GpuBuffer to share the same structure for 
+	 * properties. 
+	 */
+	class BS_CORE_EXPORT GpuBufferProperties
+	{
+	public:
+		GpuBufferProperties(UINT32 elementCount, UINT32 elementSize, GpuBufferType type, 
+			GpuBufferUsage usage, bool randomGpuWrite, bool useCounter);
+
+		/**
+		 * Returns the type of the GPU buffer. Type determines which kind of views (if any) can be created for the buffer, 
+		 * and how is data read or modified in it.
+		 */
+		GpuBufferType getType() const { return mType; }
+
+		/** Returns buffer usage which determines how are planning on updating the buffer contents. */
+		GpuBufferUsage getUsage() const { return mUsage; }
+
+		/** Return whether the buffer supports random reads and writes within the GPU programs. */
+		bool getRandomGpuWrite() const { return mRandomGpuWrite; }
+
+		/**	Returns whether the buffer supports counter use within GPU programs. */
+		bool getUseCounter() const { return mUseCounter; }
+
+		/**	Returns number of elements in the buffer. */
+		UINT32 getElementCount() const { return mElementCount; }
+
+		/**	Returns size of a single element in the buffer in bytes. */
+		UINT32 getElementSize() const { return mElementSize; }
+
+	protected:
+		GpuBufferType mType;
+		GpuBufferUsage mUsage;
+		bool mRandomGpuWrite;
+		bool mUseCounter;
+		UINT32 mElementCount;
+		UINT32 mElementSize;
+	};
+
+	/** @cond INTERNAL */
+
+	/**
+	 * Core thread version of a GpuBuffer.
+	 *
+	 * @note	Core thread only.
+	 */
+	class BS_CORE_EXPORT GpuBufferCore : public CoreObjectCore
+	{
+	public:
+		virtual ~GpuBufferCore();
+
+		/**
+		 * Locks the buffer returning a pointer to the internal buffer data that you may then read or write to. 
+		 * Caller must ensure it will only perform actions promised in the provided GPU lock options parameter.
+		 *
+		 * @param[in]	offset	Number of bytes at which to lock the buffer. Returned pointer points to this location.
+		 * @param[in]	length	Number of bytes to lock.
+		 * @param[in]	options How to lock the buffer. Certain options offer better performance than others.
+		 */
+		virtual void* lock(UINT32 offset, UINT32 length, GpuLockOptions options) = 0;
+
+		/**
+		 * Unlocks a previously locked buffer. Any pointers to internal buffers returned when it was locked will become 
+		 * invalid.
+		 */
+		virtual void unlock() = 0;
+
+		/**
+		 * Reads buffer data into the previously allocated buffer.
+		 *
+		 * @param[in]	offset	Number of bytes at which to start reading the buffer.
+		 * @param[in]	length	Number of bytes to read.
+		 * @param[in]	pDest	Previously allocated buffer of @p length bytes size.
+		 */
+        virtual void readData(UINT32 offset, UINT32 length, void* pDest) = 0;
+
+		/**
+		 * Writes data into the buffer.
+		 *
+		 * @param[in]	offset		Number of bytes at which to start writing to the buffer.
+		 * @param[in]	length		Number of bytes to write.
+		 * @param[in]	pDest		Previously allocated buffer used to retrieve the data from.
+		 * @param[in]	writeFlags  Flags that may be used to improve performance for specific use cases.
+		 */
+        virtual void writeData(UINT32 offset, UINT32 length, const void* pSource, BufferWriteType writeFlags = BufferWriteType::Normal) = 0;
+
+		/**
+		 * Copies data from another buffer into this buffer.
+		 *
+		 * @param[in]	srcBuffer			Buffer to copy the data from.
+		 * @param[in]	srcOffset			Offset in bytes into the source buffer - this is where reading starts from.
+		 * @param[in]	dstOffset			Offset in bytes into the destination buffer - this is where writing starts from.
+		 * @param[in]	length				Number of bytes to copy from source to destination.
+		 * @param[in]	discardWholeBuffer	If true, the contents of the current buffer will be entirely discarded. This can
+		 *									improve performance if you know you wont be needing that data any more.
+		 */
+		virtual void copyData(GpuBufferCore& srcBuffer, UINT32 srcOffset,
+			UINT32 dstOffset, UINT32 length, bool discardWholeBuffer = false) = 0;
+
+		/** Returns properties describing the buffer. */
+		const GpuBufferProperties& getProperties() const { return mProperties; }
+
+		/**
+		 * Creates a buffer view that may be used for binding a buffer to a slot in the pipeline. Views allow you to specify
+		 * how is data in the buffer organized to make it easier for the pipeline to interpret.
+		 *
+		 * @param[in]	buffer			Buffer to create the view for.
+		 * @param[in]	firstElement	Position of the first element visible by the view.
+		 * @param[in]	elementWidth	Width of one element in bytes.
+		 * @param[in]	numElements		Number of elements in the buffer.
+		 * @param[in]	useCounter		Should the buffer allow use of a counter. This is only relevant for random read write buffers.
+		 * @param[in]	usage			Determines type of the view we are creating, and which slots in the pipeline will the view be bindable to.
+		 *
+		 * @note If a view with this exact parameters already exists, it will be returned and new one will not be created.
+		 * @note Only Default and RandomWrite views are supported for this type of buffer. 
+		 */
+		// TODO Low Priority: Perhaps reflect usage flag limitation by having an enum with only the supported two options?
+		static GpuBufferView* requestView(const SPtr<GpuBufferCore>& buffer, UINT32 firstElement, UINT32 elementWidth, 
+			UINT32 numElements, bool useCounter, GpuViewUsage usage);
+
+		/**
+		 * Releases a view created with requestView. 
+		 *
+		 * @note	View will only truly get released once all references to it are released.
+		 */
+		static void releaseView(GpuBufferView* view);
+
+	protected:
+		GpuBufferCore(UINT32 elementCount, UINT32 elementSize, GpuBufferType type, 
+			GpuBufferUsage usage, bool randomGpuWrite = false, bool useCounter = false);
+
+		/** Creates an empty view for the current buffer. */
+		virtual GpuBufferView* createView() = 0;
+
+		/**	Destroys a view previously created for this buffer. */
+		virtual void destroyView(GpuBufferView* view) = 0;
+
+		/**	Destroys all buffer views regardless if their reference count is zero or not. */
+		void clearBufferViews();
+
+		/**	Helper class to help with reference counting for GPU buffer views. */
+		struct GpuBufferReference
+		{
+			GpuBufferReference(GpuBufferView* _view)
+				:view(_view), refCount(0)
+			{ }
+
+			GpuBufferView* view;
+			UINT32 refCount;
+		};
+
+		UnorderedMap<GPU_BUFFER_DESC, GpuBufferReference*, GpuBufferView::HashFunction, GpuBufferView::EqualFunction> mBufferViews;
+		GpuBufferProperties mProperties;
+	};
+
+	/** @endcond */
+
+	/**
+	 * Handles a generic GPU buffer that you may use for storing any kind of data you wish to be accessible to the GPU.
+	 * These buffers may be bounds to GPU program binding slots and accessed from a GPU program, or may be used by fixed 
+	 * pipeline in some way.
+	 *
+	 * Buffer types:
+	 *  - Raw buffers containing a block of bytes that are up to the GPU program to interpret.
+	 *	- Structured buffer containing an array of structures compliant to a certain layout. Similar to raw buffer but 
+	 *    easier to interpret the data.
+	 *	- Random read/write buffers that allow you to write to random parts of the buffer from within the GPU program, and 
+	 *    then read it later. These can only be bound to pixel and compute stages.
+	 *	- Append/Consume buffers also allow you to write to them, but in a stack-like fashion, usually where one set of 
+	 *    programs produces data while other set consumes it from the same buffer. Append/Consume buffers are structured
+	 *	  by default.
+	 *
+	 * @note	Sim thread only.
+	 */
+	class BS_CORE_EXPORT GpuBuffer : public CoreObject
+    {
+    public:
+		virtual ~GpuBuffer() { }
+
+		/** Returns properties describing the buffer. */
+		const GpuBufferProperties& getProperties() const { return mProperties; }
+
+		/** Retrieves a core implementation of a GPU buffer usable only from the core thread. */
+		SPtr<GpuBufferCore> getCore() const;
+
+	protected:
+		friend class HardwareBufferManager;
+
+		GpuBuffer(UINT32 elementCount, UINT32 elementSize, GpuBufferType type, GpuBufferUsage usage, 
+			bool randomGpuWrite = false, bool useCounter = false);
+
+		/** @copydoc CoreObject::createCore */
+		SPtr<CoreObjectCore> createCore() const override;
+
+		/** @copydoc HardwareBufferManager::createGpuParamBlockBuffer */
+		static GpuParamBlockBufferPtr create(UINT32 size, GpuParamBlockUsage usage = GPBU_DYNAMIC);
+
+		GpuBufferProperties mProperties;
+    };
+
+	/** @} */
+}

+ 86 - 98
BansheeCore/Include/BsGpuBufferView.h

@@ -1,99 +1,87 @@
-#pragma once
-
-#include "BsCorePrerequisites.h"
-
-namespace BansheeEngine
-{
-	/**
-	 * @brief	Descriptor structure used for initializing a GPU buffer view.
-	 *
-	 * @see		GpuBufferView
-	 * @see		GpuBuffer
-	 */
-	struct BS_CORE_EXPORT GPU_BUFFER_DESC
-	{
-		UINT32 firstElement;
-		UINT32 elementWidth;
-		UINT32 numElements;
-		bool useCounter;
-		GpuViewUsage usage;
-	};
-
-	/**
-	 * @brief	Holds information about a GPU buffer view. Views allow you to specify
-	 *			how is data in a buffer organized to make it easier for the pipeline to interpret.
-	 *			
-	 * @note	Buffers don't get bound to the pipeline directly, views do.
-	 *			Core thread only.
-	 *
-	 * @see		GpuBuffer
-	 */
-	class BS_CORE_EXPORT GpuBufferView
-	{
-	public:
-		class HashFunction
-		{
-		public:
-			size_t operator()(const GPU_BUFFER_DESC& key) const;
-		};
-
-		class EqualFunction
-		{
-		public:
-			bool operator()(const GPU_BUFFER_DESC& a, const GPU_BUFFER_DESC& b) const;
-		};
-
-		GpuBufferView();
-		virtual ~GpuBufferView();
-
-		/**
-		 * @brief	Initializes the view with the specified buffer and 
-		 *			a set of parameters describing the view to create.
-		 *			Must be called right after construction.
-		 */
-		virtual void initialize(const SPtr<GpuBufferCore>& buffer, GPU_BUFFER_DESC& desc);
-
-		/**
-		 * @brief	Returns a descriptor structure used for creating the view.
-		 */
-		const GPU_BUFFER_DESC& getDesc() const { return mDesc; }
-
-		/**
-		 * @brief	Returns the buffer this view was created for.
-		 */
-		SPtr<GpuBufferCore> getBuffer() const { return mBuffer; }
-
-		/**
-		 * @brief	Returns index of first element in the buffer that this view
-		 *			provides access to.
-		 */
-		UINT32 getFirstElement() const { return mDesc.firstElement; }
-
-		/**
-		 * @brief	Returns width of an element in the buffer, in bytes.
-		 */
-		UINT32 getElementWidth() const { return mDesc.elementWidth; }
-
-		/**
-		 * @brief	Returns the total number of elements this buffer provides
-		 *			access to.
-		 */
-		UINT32 getNumElements() const { return mDesc.numElements; }
-
-		/**
-		 * @brief	Returns true if this view allows a GPU program to use counters on
-		 *			the bound buffer.
-		 */
-		bool getUseCounter() const { return mDesc.useCounter; }
-
-		/**
-		 * @brief	Returns view usage which determines where in the pipeline 
-		 *			can the view be bound.
-		 */
-		GpuViewUsage getUsage() const { return mDesc.usage; }
-
-	protected:
-		GPU_BUFFER_DESC mDesc;
-		SPtr<GpuBufferCore> mBuffer;
-	};
+#pragma once
+
+#include "BsCorePrerequisites.h"
+
+namespace BansheeEngine
+{
+	/** @cond INTERNAL */
+	/** @addtogroup RenderAPI
+	 *  @{
+	 */
+
+	/**
+	 * Descriptor structure used for initializing a GPUBufferView.
+	 *
+	 * @see		GpuBuffer
+	 */
+	struct BS_CORE_EXPORT GPU_BUFFER_DESC
+	{
+		UINT32 firstElement;
+		UINT32 elementWidth;
+		UINT32 numElements;
+		bool useCounter;
+		GpuViewUsage usage;
+	};
+
+	/**
+	 * Holds information about a GPU buffer view. Views allow you to specify how is data in a buffer organized to make it 
+	 * easier for the pipeline to interpret.
+	 *			
+	 * @note	Buffers don't get bound to the pipeline directly, views do. 
+	 * @note	Core thread only.
+	 *
+	 * @see		GpuBuffer
+	 */
+	class BS_CORE_EXPORT GpuBufferView
+	{
+	public:
+		class HashFunction
+		{
+		public:
+			size_t operator()(const GPU_BUFFER_DESC& key) const;
+		};
+
+		class EqualFunction
+		{
+		public:
+			bool operator()(const GPU_BUFFER_DESC& a, const GPU_BUFFER_DESC& b) const;
+		};
+
+		GpuBufferView();
+		virtual ~GpuBufferView();
+
+		/**
+		 * Initializes the view with the specified buffer and a set of parameters describing the view to create. Must be 
+		 * called right after construction.
+		 */
+		virtual void initialize(const SPtr<GpuBufferCore>& buffer, GPU_BUFFER_DESC& desc);
+
+		/** Returns a descriptor structure used for creating the view. */
+		const GPU_BUFFER_DESC& getDesc() const { return mDesc; }
+
+		/**	Returns the buffer this view was created for. */
+		SPtr<GpuBufferCore> getBuffer() const { return mBuffer; }
+
+		/** Returns index of first element in the buffer that this view provides access to. */
+		UINT32 getFirstElement() const { return mDesc.firstElement; }
+
+		/** Returns width of an element in the buffer, in bytes. */
+		UINT32 getElementWidth() const { return mDesc.elementWidth; }
+
+		/**	Returns the total number of elements this buffer provides access to. */
+		UINT32 getNumElements() const { return mDesc.numElements; }
+
+		/**	Returns true if this view allows a GPU program to use counters on the bound buffer. */
+		bool getUseCounter() const { return mDesc.useCounter; }
+
+		/** Returns view usage which determines where in the pipeline can the view be bound. */
+		GpuViewUsage getUsage() const { return mDesc.usage; }
+
+	protected:
+		GPU_BUFFER_DESC mDesc;
+		SPtr<GpuBufferCore> mBuffer;
+	};
+
+	/** @} */
+	/** @endcond */
 }

+ 290 - 321
BansheeCore/Include/BsGpuParam.h

@@ -1,322 +1,291 @@
-#pragma once
-
-#include "BsCorePrerequisites.h"
-#include "BsVector2.h"
-#include "BsVector3.h"
-#include "BsVector4.h"
-#include "BsMatrix3.h"
-#include "BsMatrix4.h"
-#include "BsMatrixNxM.h"
-#include "BsColor.h"
-
-namespace BansheeEngine
-{
-	template<bool Core> struct TGpuParamsPtrType { };
-	template<> struct TGpuParamsPtrType<false> { typedef SPtr<GpuParams> Type; };
-	template<> struct TGpuParamsPtrType<true> { typedef SPtr<GpuParamsCore> Type; };
-
-	template<bool Core> struct TGpuParamTextureType { };
-	template<> struct TGpuParamTextureType < false > { typedef HTexture Type; };
-	template<> struct TGpuParamTextureType < true > { typedef SPtr<TextureCore> Type; };
-
-	template<bool Core> struct TGpuParamSamplerStateType { };
-	template<> struct TGpuParamSamplerStateType < false > { typedef SamplerStatePtr Type; };
-	template<> struct TGpuParamSamplerStateType < true > { typedef SPtr<SamplerStateCore> Type; };
-
-	/**
-	 * @brief	A handle that allows you to set a GpuProgram parameter. Internally keeps a reference to the 
-	 *			GPU parameter buffer and the necessary offsets. You should specialize this type for specific 
-	 *			parameter types. 
-	 *
-	 *			Object of this type must be returned by a Material. Setting/Getting parameter values will internally
-	 *			access a GPU parameter buffer attached to the Material this parameter was created from. Anything
-	 *			rendered with that material will then use those set values.
-	 * 			
-	 * @note	Normally you can set a GpuProgram parameter by calling various set/get methods on a Material.
-	 *			This class primarily used an as optimization in performance critical bits of code
-	 * 			where it is important to locate and set parameters quickly without any lookups
-	 *			(Mentioned set/get methods expect a parameter name). You just retrieve the handle 
-	 *			once and then set the parameter value many times with minimal performance impact.
-	 * 
-	 * @see		Material
-	 */
-	template<class T, bool Core>
-	class BS_CORE_EXPORT TGpuDataParam
-	{
-	private:
-		template<bool Core> struct TGpuParamBufferType { };
-		template<> struct TGpuParamBufferType < false > { typedef SPtr<GpuParamBlockBuffer> Type; };
-		template<> struct TGpuParamBufferType < true > { typedef SPtr<GpuParamBlockBufferCore> Type; };
-
-		typedef typename TGpuParamBufferType<Core>::Type GpuParamBufferType;
-		typedef typename TGpuParamsPtrType<Core>::Type GpuParamsType;
-
-		/**
-		 * @brief	Policy class that allows us to re-use this template class for matrices which might
-		 *			need transposing, and other types which do not. Matrix needs to be transposed for
-		 *			certain render systems depending on how they store them in memory.
-		 */
-		template<class Type>
-		struct TransposePolicy
-		{
-			static Type transpose(const Type& value) { return value; }
-			static bool transposeEnabled(bool enabled) { return false; }
-		};
-
-		/**
-		 * @brief	Transpose policy for 3x3 matrix.
-		 */
-		template<>
-		struct TransposePolicy<Matrix3>
-		{
-			static Matrix3 transpose(const Matrix3& value) { return value.transpose(); }
-			static bool transposeEnabled(bool enabled) { return enabled; }
-		};
-
-		/**
-		* @brief	Transpose policy for 4x4 matrix.
-		*/
-		template<>
-		struct TransposePolicy<Matrix4>
-		{
-			static Matrix4 transpose(const Matrix4& value) { return value.transpose(); }
-			static bool transposeEnabled(bool enabled) { return enabled; }
-		};
-
-		/**
-		 * @brief	Transpose policy for NxM matrix.
-		 */
-		template<int N, int M>
-		struct TransposePolicy<MatrixNxM<N, M>>
-		{
-			static MatrixNxM<N, M> transpose(const MatrixNxM<N, M>& value) { return value.transpose(); }
-			static bool transposeEnabled(bool enabled) { return enabled; }
-		};
-
-	public:
-		TGpuDataParam();
-		TGpuDataParam(GpuParamDataDesc* paramDesc, const GpuParamsType& parent);
-
-		/**
-		 * @brief	Sets a parameter value at the specified array index. If parameter does not
-		 *			contain an array leave the index at 0.
-		 *
-		 * @note	Like with all GPU parameters, the actual GPU buffer will not be updated until rendering
-		 *			with material this parameter was created from starts on the core thread.
-		 */
-		void set(const T& value, UINT32 arrayIdx = 0);
-
-		/**
-		 * @brief	Returns a value of a parameter at the specified array index. If parameter does not
-		 *			contain an array leave the index at 0.
-		 *
-		 * @note	No GPU reads are done. Data returned was cached when it was written. 
-		 */
-		T get(UINT32 arrayIdx = 0);
-
-		/**
-		 * @brief	Checks if param is initialized.
-		 */
-		bool operator==(const nullptr_t &nullval) const
-		{
-			return mParamDesc == nullptr;
-		}
-
-	protected:
-		GpuParamsType mParent;
-		GpuParamDataDesc* mParamDesc;
-	};
-
-	/**
-	 * @copydoc TGpuDataParam
-	 */
-	template<bool Core>
-	class BS_CORE_EXPORT TGpuParamStruct
-	{
-	public:
-		template<bool Core> struct TGpuParamBufferType { };
-		template<> struct TGpuParamBufferType < false > { typedef SPtr<GpuParamBlockBuffer> Type; };
-		template<> struct TGpuParamBufferType < true > { typedef SPtr<GpuParamBlockBufferCore> Type; };
-
-		typedef typename TGpuParamBufferType<Core>::Type GpuParamBufferType;
-		typedef typename TGpuParamsPtrType<Core>::Type GpuParamsType;
-
-		TGpuParamStruct();
-		TGpuParamStruct(GpuParamDataDesc* paramDesc, const GpuParamsType& parent);
-
-		/**
-		 * @copydoc	TGpuDataParam::set
-		 */
-		void set(const void* value, UINT32 sizeBytes, UINT32 arrayIdx = 0);
-
-		/**
-		 * @copydoc	TGpuDataParam::get
-		 */
-		void get(void* value, UINT32 sizeBytes, UINT32 arrayIdx = 0);
-
-		/**
-		 * @brief	Returns the size of the struct in bytes.
-		 */
-		UINT32 getElementSize() const;
-
-		/**
-		 * @brief	Checks if param is initialized.
-		 */
-		bool operator==(const nullptr_t &nullval) const
-		{
-			return mParamDesc == nullptr;
-		}
-
-	protected:
-		GpuParamsType mParent;
-		GpuParamDataDesc* mParamDesc;
-	};
-
-	/**
-	 * @copydoc TGpuObjectParam
-	 */
-	template<bool Core>
-	class BS_CORE_EXPORT TGpuParamTexture
-	{
-	private:
-		friend class GpuParams;
-		friend class GpuParamsCore;
-
-		typedef typename TGpuParamsPtrType<Core>::Type GpuParamsType;
-		typedef typename TGpuParamTextureType<Core>::Type TextureType;
-
-	public:
-		TGpuParamTexture();
-		TGpuParamTexture(GpuParamObjectDesc* paramDesc, const GpuParamsType& parent);
-
-		/**
-		 * @copydoc	TGpuDataParam::set
-		 */
-		void set(const TextureType& texture);
-
-		/**
-		 * @copydoc	TGpuDataParam::get
-		 */
-		TextureType get();
-
-		/**
-		 * @brief	Checks if param is initialized.
-		 */
-		bool operator==(const nullptr_t &nullval) const
-		{
-			return mParamDesc == nullptr;
-		}
-
-	protected:
-		GpuParamsType mParent;
-		GpuParamObjectDesc* mParamDesc;
-	};
-
-	/**
-	 * @copydoc TGpuObjectParam
-	 */
-	template<bool Core>
-	class BS_CORE_EXPORT TGpuParamLoadStoreTexture
-	{
-	private:
-		friend class GpuParams;
-		friend class GpuParamsCore;
-
-		typedef typename TGpuParamsPtrType<Core>::Type GpuParamsType;
-		typedef typename TGpuParamTextureType<Core>::Type TextureType;
-
-	public:
-		TGpuParamLoadStoreTexture();
-		TGpuParamLoadStoreTexture(GpuParamObjectDesc* paramDesc, const GpuParamsType& parent);
-
-		/**
-		 * @copydoc	TGpuDataParam::set
-		 */
-		void set(const TextureType& texture, const TextureSurface& surface);
-
-		/**
-		 * @copydoc	TGpuDataParam::get
-		 */
-		TextureType get();
-
-		/**
-		 * @brief	Checks if param is initialized.
-		 */
-		bool operator==(const nullptr_t &nullval) const
-		{
-			return mParamDesc == nullptr;
-		}
-
-	protected:
-		GpuParamsType mParent;
-		GpuParamObjectDesc* mParamDesc;
-	};
-
-	/**
-	 * @copydoc TGpuObjectParam
-	 */
-	template<bool Core>
-	class BS_CORE_EXPORT TGpuParamSampState
-	{
-	private:
-		friend class GpuParams;
-		friend class GpuParamsCore;
-
-		typedef typename TGpuParamsPtrType<Core>::Type GpuParamsType;
-		typedef typename TGpuParamSamplerStateType<Core>::Type SamplerStateType;
-
-	public:
-		TGpuParamSampState();
-		TGpuParamSampState(GpuParamObjectDesc* paramDesc, const GpuParamsType& parent);
-
-		/**
-		 * @copydoc	TGpuDataParam::set
-		 */
-		void set(const SamplerStateType& samplerState);
-
-		/**
-		 * @copydoc	TGpuDataParam::get
-		 */
-		SamplerStateType get();
-
-		/**
-		 * @brief	Checks if param is initialized.
-		 */
-		bool operator==(const nullptr_t &nullval) const
-		{
-			return mParamDesc == nullptr;
-		}
-
-	protected:
-		GpuParamsType mParent;
-		GpuParamObjectDesc* mParamDesc;
-	};
-
-	typedef TGpuDataParam<float, false> GpuParamFloat;
-	typedef TGpuDataParam<Color, false> GpuParamColor;
-	typedef TGpuDataParam<Vector2, false> GpuParamVec2;
-	typedef TGpuDataParam<Vector3, false> GpuParamVec3;
-	typedef TGpuDataParam<Vector4, false> GpuParamVec4;
-	typedef TGpuDataParam<Matrix3, false> GpuParamMat3;
-	typedef TGpuDataParam<Matrix4, false> GpuParamMat4;
-
-	typedef TGpuDataParam<float, true> GpuParamFloatCore;
-	typedef TGpuDataParam<Color, true> GpuParamColorCore;
-	typedef TGpuDataParam<Vector2, true> GpuParamVec2Core;
-	typedef TGpuDataParam<Vector3, true> GpuParamVec3Core;
-	typedef TGpuDataParam<Vector4, true> GpuParamVec4Core;
-	typedef TGpuDataParam<Matrix3, true> GpuParamMat3Core;
-	typedef TGpuDataParam<Matrix4, true> GpuParamMat4Core;
-
-	typedef TGpuParamStruct<false> GpuParamStruct;
-	typedef TGpuParamStruct<true> GpuParamStructCore;
-
-	typedef TGpuParamTexture<false> GpuParamTexture;
-	typedef TGpuParamTexture<true> GpuParamTextureCore;
-
-	typedef TGpuParamSampState<false> GpuParamSampState;
-	typedef TGpuParamSampState<true> GpuParamSampStateCore;
-
-	typedef TGpuParamLoadStoreTexture<false> GpuParamLoadStoreTexture;
-	typedef TGpuParamLoadStoreTexture<true> GpuParamLoadStoreTextureCore;
+#pragma once
+
+#include "BsCorePrerequisites.h"
+#include "BsVector2.h"
+#include "BsVector3.h"
+#include "BsVector4.h"
+#include "BsMatrix3.h"
+#include "BsMatrix4.h"
+#include "BsMatrixNxM.h"
+#include "BsColor.h"
+
+namespace BansheeEngine
+{
+	/** @addtogroup Implementation
+	 *  @{
+	 */
+
+	template<bool Core> struct TGpuParamsPtrType { };
+	template<> struct TGpuParamsPtrType<false> { typedef SPtr<GpuParams> Type; };
+	template<> struct TGpuParamsPtrType<true> { typedef SPtr<GpuParamsCore> Type; };
+
+	template<bool Core> struct TGpuParamTextureType { };
+	template<> struct TGpuParamTextureType < false > { typedef HTexture Type; };
+	template<> struct TGpuParamTextureType < true > { typedef SPtr<TextureCore> Type; };
+
+	template<bool Core> struct TGpuParamSamplerStateType { };
+	template<> struct TGpuParamSamplerStateType < false > { typedef SamplerStatePtr Type; };
+	template<> struct TGpuParamSamplerStateType < true > { typedef SPtr<SamplerStateCore> Type; };
+
+	/**
+	 * A handle that allows you to set a GpuProgram parameter. Internally keeps a reference to the GPU parameter buffer and
+	 * the necessary offsets. You should specialize this type for specific parameter types. 
+	 *
+	 * Object of this type must be returned by a Material. Setting/Getting parameter values will internally access a GPU 
+	 * parameter buffer attached to the Material this parameter was created from. Anything rendered with that material will
+	 * then use those set values.
+	 * 			
+	 * @note	
+	 * Normally you can set a GpuProgram parameter by calling various set/get methods on a Material. This class primarily 
+	 * used an as optimization in performance critical bits of code where it is important to locate and set parameters 
+	 * quickly without any lookups (Mentioned set/get methods expect a parameter name). You just retrieve the handle once 
+	 * and then set the parameter value many times with minimal performance impact.
+	 * 
+	 * @see		Material
+	 */
+	template<class T, bool Core>
+	class BS_CORE_EXPORT TGpuDataParam
+	{
+	private:
+		template<bool Core> struct TGpuParamBufferType { };
+		template<> struct TGpuParamBufferType < false > { typedef SPtr<GpuParamBlockBuffer> Type; };
+		template<> struct TGpuParamBufferType < true > { typedef SPtr<GpuParamBlockBufferCore> Type; };
+
+		typedef typename TGpuParamBufferType<Core>::Type GpuParamBufferType;
+		typedef typename TGpuParamsPtrType<Core>::Type GpuParamsType;
+
+		/**
+		 * Policy class that allows us to re-use this template class for matrices which might need transposing, and other 
+		 * types which do not. Matrix needs to be transposed for certain render systems depending on how they store them 
+		 * in memory.
+		 */
+		template<class Type>
+		struct TransposePolicy
+		{
+			static Type transpose(const Type& value) { return value; }
+			static bool transposeEnabled(bool enabled) { return false; }
+		};
+
+		/** Transpose policy for 3x3 matrix. */
+		template<>
+		struct TransposePolicy<Matrix3>
+		{
+			static Matrix3 transpose(const Matrix3& value) { return value.transpose(); }
+			static bool transposeEnabled(bool enabled) { return enabled; }
+		};
+
+		/**	Transpose policy for 4x4 matrix. */
+		template<>
+		struct TransposePolicy<Matrix4>
+		{
+			static Matrix4 transpose(const Matrix4& value) { return value.transpose(); }
+			static bool transposeEnabled(bool enabled) { return enabled; }
+		};
+
+		/**	Transpose policy for NxM matrix. */
+		template<int N, int M>
+		struct TransposePolicy<MatrixNxM<N, M>>
+		{
+			static MatrixNxM<N, M> transpose(const MatrixNxM<N, M>& value) { return value.transpose(); }
+			static bool transposeEnabled(bool enabled) { return enabled; }
+		};
+
+	public:
+		TGpuDataParam();
+		TGpuDataParam(GpuParamDataDesc* paramDesc, const GpuParamsType& parent);
+
+		/**
+		 * Sets a parameter value at the specified array index. If parameter does not contain an array leave the index at 0.
+		 *
+		 * @note	
+		 * Like with all GPU parameters, the actual GPU buffer will not be updated until rendering with material this 
+		 * parameter was created from starts on the core thread.
+		 */
+		void set(const T& value, UINT32 arrayIdx = 0);
+
+		/**
+		 * Returns a value of a parameter at the specified array index. If parameter does not contain an array leave the 
+		 * index at 0.
+		 *
+		 * @note	No GPU reads are done. Data returned was cached when it was written. 
+		 */
+		T get(UINT32 arrayIdx = 0);
+
+		/** Checks if param is initialized. */
+		bool operator==(const nullptr_t &nullval) const
+		{
+			return mParamDesc == nullptr;
+		}
+
+	protected:
+		GpuParamsType mParent;
+		GpuParamDataDesc* mParamDesc;
+	};
+
+	/** @copydoc TGpuDataParam */
+	template<bool Core>
+	class BS_CORE_EXPORT TGpuParamStruct
+	{
+	public:
+		template<bool Core> struct TGpuParamBufferType { };
+		template<> struct TGpuParamBufferType < false > { typedef SPtr<GpuParamBlockBuffer> Type; };
+		template<> struct TGpuParamBufferType < true > { typedef SPtr<GpuParamBlockBufferCore> Type; };
+
+		typedef typename TGpuParamBufferType<Core>::Type GpuParamBufferType;
+		typedef typename TGpuParamsPtrType<Core>::Type GpuParamsType;
+
+		TGpuParamStruct();
+		TGpuParamStruct(GpuParamDataDesc* paramDesc, const GpuParamsType& parent);
+
+		/** @copydoc TGpuDataParam::set */
+		void set(const void* value, UINT32 sizeBytes, UINT32 arrayIdx = 0);
+
+		/** @copydoc TGpuDataParam::get */
+		void get(void* value, UINT32 sizeBytes, UINT32 arrayIdx = 0);
+
+		/**	Returns the size of the struct in bytes. */
+		UINT32 getElementSize() const;
+
+		/**	Checks if param is initialized. */
+		bool operator==(const nullptr_t &nullval) const
+		{
+			return mParamDesc == nullptr;
+		}
+
+	protected:
+		GpuParamsType mParent;
+		GpuParamDataDesc* mParamDesc;
+	};
+
+	/** @copydoc TGpuObjectParam */
+	template<bool Core>
+	class BS_CORE_EXPORT TGpuParamTexture
+	{
+	private:
+		friend class GpuParams;
+		friend class GpuParamsCore;
+
+		typedef typename TGpuParamsPtrType<Core>::Type GpuParamsType;
+		typedef typename TGpuParamTextureType<Core>::Type TextureType;
+
+	public:
+		TGpuParamTexture();
+		TGpuParamTexture(GpuParamObjectDesc* paramDesc, const GpuParamsType& parent);
+
+		/** @copydoc TGpuDataParam::set */
+		void set(const TextureType& texture);
+
+		/** @copydoc TGpuDataParam::get */
+		TextureType get();
+
+		/** Checks if param is initialized. */
+		bool operator==(const nullptr_t &nullval) const
+		{
+			return mParamDesc == nullptr;
+		}
+
+	protected:
+		GpuParamsType mParent;
+		GpuParamObjectDesc* mParamDesc;
+	};
+
+	/** @copydoc TGpuObjectParam */
+	template<bool Core>
+	class BS_CORE_EXPORT TGpuParamLoadStoreTexture
+	{
+	private:
+		friend class GpuParams;
+		friend class GpuParamsCore;
+
+		typedef typename TGpuParamsPtrType<Core>::Type GpuParamsType;
+		typedef typename TGpuParamTextureType<Core>::Type TextureType;
+
+	public:
+		TGpuParamLoadStoreTexture();
+		TGpuParamLoadStoreTexture(GpuParamObjectDesc* paramDesc, const GpuParamsType& parent);
+
+		/** @copydoc TGpuDataParam::set */
+		void set(const TextureType& texture, const TextureSurface& surface);
+
+		/** @copydoc TGpuDataParam::get */
+		TextureType get();
+
+		/**	Checks if param is initialized. */
+		bool operator==(const nullptr_t &nullval) const
+		{
+			return mParamDesc == nullptr;
+		}
+
+	protected:
+		GpuParamsType mParent;
+		GpuParamObjectDesc* mParamDesc;
+	};
+
+	/** @copydoc TGpuObjectParam */
+	template<bool Core>
+	class BS_CORE_EXPORT TGpuParamSampState
+	{
+	private:
+		friend class GpuParams;
+		friend class GpuParamsCore;
+
+		typedef typename TGpuParamsPtrType<Core>::Type GpuParamsType;
+		typedef typename TGpuParamSamplerStateType<Core>::Type SamplerStateType;
+
+	public:
+		TGpuParamSampState();
+		TGpuParamSampState(GpuParamObjectDesc* paramDesc, const GpuParamsType& parent);
+
+		/** @copydoc TGpuDataParam::set */
+		void set(const SamplerStateType& samplerState);
+
+		/** @copydoc TGpuDataParam::get */
+		SamplerStateType get();
+
+		/**	Checks if param is initialized. */
+		bool operator==(const nullptr_t &nullval) const
+		{
+			return mParamDesc == nullptr;
+		}
+
+	protected:
+		GpuParamsType mParent;
+		GpuParamObjectDesc* mParamDesc;
+	};
+
+	/** @} */
+
+	/** @addtogroup RenderAPI
+	 *  @{
+	 */
+
+	typedef TGpuDataParam<float, false> GpuParamFloat;
+	typedef TGpuDataParam<Color, false> GpuParamColor;
+	typedef TGpuDataParam<Vector2, false> GpuParamVec2;
+	typedef TGpuDataParam<Vector3, false> GpuParamVec3;
+	typedef TGpuDataParam<Vector4, false> GpuParamVec4;
+	typedef TGpuDataParam<Matrix3, false> GpuParamMat3;
+	typedef TGpuDataParam<Matrix4, false> GpuParamMat4;
+
+	typedef TGpuDataParam<float, true> GpuParamFloatCore;
+	typedef TGpuDataParam<Color, true> GpuParamColorCore;
+	typedef TGpuDataParam<Vector2, true> GpuParamVec2Core;
+	typedef TGpuDataParam<Vector3, true> GpuParamVec3Core;
+	typedef TGpuDataParam<Vector4, true> GpuParamVec4Core;
+	typedef TGpuDataParam<Matrix3, true> GpuParamMat3Core;
+	typedef TGpuDataParam<Matrix4, true> GpuParamMat4Core;
+
+	typedef TGpuParamStruct<false> GpuParamStruct;
+	typedef TGpuParamStruct<true> GpuParamStructCore;
+
+	typedef TGpuParamTexture<false> GpuParamTexture;
+	typedef TGpuParamTexture<true> GpuParamTextureCore;
+
+	typedef TGpuParamSampState<false> GpuParamSampState;
+	typedef TGpuParamSampState<true> GpuParamSampStateCore;
+
+	typedef TGpuParamLoadStoreTexture<false> GpuParamLoadStoreTexture;
+	typedef TGpuParamLoadStoreTexture<true> GpuParamLoadStoreTextureCore;
+
+	/** @} */
 }

+ 162 - 194
BansheeCore/Include/BsGpuParamBlockBuffer.h

@@ -1,195 +1,163 @@
-#pragma once
-
-#include "BsCorePrerequisites.h"
-#include "BsCoreObject.h"
-
-namespace BansheeEngine
-{
-	/**
-	 * @brief	Core thread version of a GPU param block buffer.
-	 *
-	 * @see		GpuParamBlockBuffer
-	 *
-	 * @note	Core thread only.
-	 */
-	class BS_CORE_EXPORT GpuParamBlockBufferCore : public CoreObjectCore
-	{
-	public:
-		GpuParamBlockBufferCore(UINT32 size, GpuParamBlockUsage usage);
-		virtual ~GpuParamBlockBufferCore();
-
-		/**
-		 * @brief	Writes all of the specified data to the buffer.
-		 * 			Data size must be the same size as the buffer;
-		 */
-		virtual void writeToGPU(const UINT8* data) = 0;
-
-		/**
-		 * @brief	Copies data from the internal buffer to a pre-allocated array. 
-		 * 			Be aware this generally isn't a very fast operation as reading
-		 *			from the GPU will most definitely involve a CPU-GPU sync point.
-		 *
-		 * @param [in,out]	data	Array where the data will be written to. Must be of
-		 * 							"getSize()" bytes.
-		 */
-		virtual void readFromGPU(UINT8* data) const = 0;
-
-		/**
-		 * @brief	Flushes any cached data into the actual GPU buffer.
-		 */
-		void flushToGPU();
-
-		/**
-		 * @brief	Write some data to the specified offset in the buffer. 
-		 *
-		 * @note	All values are in bytes.
-		 *			Actual hardware buffer update is delayed until rendering.
-		 */
-		void write(UINT32 offset, const void* data, UINT32 size);
-
-		/**
-		 * @brief	Read some data from the specified offset in the buffer.
-		 *			
-		 * @note	All values are in bytes.
-		 *			This reads from the cached CPU buffer. Actual hardware buffer can be read
-		 *			from the core thread.
-		 */
-		void read(UINT32 offset, void* data, UINT32 size);
-
-		/**
-		 * @brief	Clear specified section of the buffer to zero.
-		 *
-		 * @note	All values are in bytes.
-		 *			Actual hardware buffer update is delayed until rendering.
-		 */
-		void zeroOut(UINT32 offset, UINT32 size);
-
-		/**
-		 * @brief	Returns the size of the buffer in bytes.
-		 */
-		UINT32 getSize() const { return mSize; }
-
-		/**
-		 * @copydoc	HardwareBufferCoreManager::createGpuParamBlockBuffer
-		 */
-		static SPtr<GpuParamBlockBufferCore> create(UINT32 size, GpuParamBlockUsage usage = GPBU_DYNAMIC);
-
-	protected:
-		/**
-		 * @copydoc	CoreObjectCore::syncToCore
-		 */
-		virtual void syncToCore(const CoreSyncData& data)  override;
-
-		GpuParamBlockUsage mUsage;
-		UINT32 mSize;
-
-		UINT8* mCachedData;
-		bool mGPUBufferDirty;
-	};
-
-	/**
-	 * @brief	Represents a GPU parameter block buffer. Parameter block buffers
-	 *			are bound to GPU programs which then fetch parameters from those buffers.
-	 *
-	 *			Writing or reading from this buffer will translate directly to API calls
-	 *			that update the GPU.
-	 * 			
-	 * @note	Sim thread only.
-	 */
-	class BS_CORE_EXPORT GpuParamBlockBuffer : public CoreObject
-	{
-	public:
-		GpuParamBlockBuffer(UINT32 size, GpuParamBlockUsage usage);
-		virtual ~GpuParamBlockBuffer();
-
-		/**
-		 * @brief	Write some data to the specified offset in the buffer. 
-		 *
-		 * @note	All values are in bytes.
-		 *			Actual hardware buffer update is delayed until rendering.
-		 */
-		void write(UINT32 offset, const void* data, UINT32 size);
-
-		/**
-		 * @brief	Read some data from the specified offset in the buffer.
-		 *			
-		 * @note	All values are in bytes.
-		 *			This reads from the cached CPU buffer. Actual hardware buffer can be read
-		 *			from the core thread.
-		 */
-		void read(UINT32 offset, void* data, UINT32 size);
-
-		/**
-		 * @brief	Clear specified section of the buffer to zero.
-		 *
-		 * @note	All values are in bytes.
-		 *			Actual hardware buffer update is delayed until rendering.
-		 */
-		void zeroOut(UINT32 offset, UINT32 size);
-
-		/**
-		 * @brief	Returns internal cached data of the buffer.
-		 */
-		const UINT8* getCachedData() const { return mCachedData; }
-
-		/**
-		 * @brief	Returns the size of the buffer in bytes.
-		 */
-		UINT32 getSize() const { return mSize; }
-
-		/**
-		 * @brief	Retrieves a core implementation of a GPU param block buffer usable only from the
-		 *			core thread.
-		 */
-		SPtr<GpuParamBlockBufferCore> getCore() const;
-
-		/**
-		 * @copydoc	HardwareBufferManager::createGpuParamBlockBuffer
-		 */
-		static GpuParamBlockBufferPtr create(UINT32 size, GpuParamBlockUsage usage = GPBU_DYNAMIC);
-
-	protected:
-		/**
-		 * @copydoc	CoreObject::createCore
-		 */
-		SPtr<CoreObjectCore> createCore() const override;
-
-		/**
-		 * @copydoc	CoreObject::syncToCore
-		 */
-		virtual CoreSyncData syncToCore(FrameAlloc* allocator) override;
-
-		GpuParamBlockUsage mUsage;
-		UINT32 mSize;
-		UINT8* mCachedData;
-	};
-
-	/**
-	 * @brief	Implementation of a GpuParamBlock buffer that doesn't use a GPU buffer
-	 *			for storage. Used with APIs that do not support GPU parameter buffers.
-	 */
-	class BS_CORE_EXPORT GenericGpuParamBlockBufferCore : public GpuParamBlockBufferCore
-	{
-	public:
-		GenericGpuParamBlockBufferCore(UINT32 size, GpuParamBlockUsage usage);
-		~GenericGpuParamBlockBufferCore();
-
-		/**
-		 * @copydoc	GpuParamBlockBufferCore::writeData
-		 */
-		void writeToGPU(const UINT8* data) override;
-
-		/**
-		 * @copydoc GpuParamBlockBufferCore::readData.
-		 */
-		void readFromGPU(UINT8* data) const override;
-
-	protected:
-		UINT8* mData;
-
-		/**
-		 * @copydoc CoreObjectCore::initialize
-		 */
-		virtual void initialize() override;
-	};
+#pragma once
+
+#include "BsCorePrerequisites.h"
+#include "BsCoreObject.h"
+
+namespace BansheeEngine
+{
+	/** @addtogroup RenderAPI
+	 *  @{
+	 */
+
+	 /** @cond INTERNAL */
+
+	/**
+	 * Core thread version of a GpuParamBlockBuffer.
+	 *
+	 * @note	Core thread only.
+	 */
+	class BS_CORE_EXPORT GpuParamBlockBufferCore : public CoreObjectCore
+	{
+	public:
+		GpuParamBlockBufferCore(UINT32 size, GpuParamBlockUsage usage);
+		virtual ~GpuParamBlockBufferCore();
+
+		/** Writes all of the specified data to the buffer. Data size must be the same size as the buffer. */
+		virtual void writeToGPU(const UINT8* data) = 0;
+
+		/**
+		 * Copies data from the internal buffer to a pre-allocated array. Be aware this generally isn't a very fast 
+		 * operation as reading from the GPU will most definitely involve a CPU-GPU sync point.
+		 *
+		 * @param [in,out]	data	Array where the data will be written to. Must be of getSize() bytes.
+		 */
+		virtual void readFromGPU(UINT8* data) const = 0;
+
+		/** Flushes any cached data into the actual GPU buffer. */
+		void flushToGPU();
+
+		/**
+		 * Write some data to the specified offset in the buffer. 
+		 *
+		 * @note	All values are in bytes. Actual hardware buffer update is delayed until rendering.
+		 */
+		void write(UINT32 offset, const void* data, UINT32 size);
+
+		/**
+		 * Read some data from the specified offset in the buffer.
+		 *			
+		 * @note	All values are in bytes. This reads from the cached CPU buffer and not directly from the GPU.
+		 */
+		void read(UINT32 offset, void* data, UINT32 size);
+
+		/**
+		 * Clear specified section of the buffer to zero.
+		 *
+		 * @note	All values are in bytes. Actual hardware buffer update is delayed until rendering.
+		 */
+		void zeroOut(UINT32 offset, UINT32 size);
+
+		/**	Returns the size of the buffer in bytes. */
+		UINT32 getSize() const { return mSize; }
+
+		/** @copydoc HardwareBufferCoreManager::createGpuParamBlockBuffer */
+		static SPtr<GpuParamBlockBufferCore> create(UINT32 size, GpuParamBlockUsage usage = GPBU_DYNAMIC);
+
+	protected:
+		/** @copydoc CoreObjectCore::syncToCore */
+		virtual void syncToCore(const CoreSyncData& data)  override;
+
+		GpuParamBlockUsage mUsage;
+		UINT32 mSize;
+
+		UINT8* mCachedData;
+		bool mGPUBufferDirty;
+	};
+
+	/**
+	 * Implementation of a GpuParamBlock buffer that doesn't use a GPU buffer for storage. Used with APIs that do not 
+	 * support GPU parameter buffers.
+	 */
+	class BS_CORE_EXPORT GenericGpuParamBlockBufferCore : public GpuParamBlockBufferCore
+	{
+	public:
+		GenericGpuParamBlockBufferCore(UINT32 size, GpuParamBlockUsage usage);
+		~GenericGpuParamBlockBufferCore();
+
+		/** @copydoc GpuParamBlockBufferCore::writeData */
+		void writeToGPU(const UINT8* data) override;
+
+		/** @copydoc GpuParamBlockBufferCore::readData */
+		void readFromGPU(UINT8* data) const override;
+
+	protected:
+		UINT8* mData;
+
+		/** @copydoc CoreObjectCore::initialize */
+		virtual void initialize() override;
+	};
+
+	/** @endcond */
+
+	/**
+	 * Represents a GPU parameter block buffer. Parameter block buffers are bound to GPU programs which then fetch 
+	 * parameters from those buffers.
+	 *
+	 * Writing or reading from this buffer will translate directly to API calls that update the GPU.
+	 * 			
+	 * @note	Sim thread only.
+	 */
+	class BS_CORE_EXPORT GpuParamBlockBuffer : public CoreObject
+	{
+	public:
+		GpuParamBlockBuffer(UINT32 size, GpuParamBlockUsage usage);
+		virtual ~GpuParamBlockBuffer();
+
+		/**
+		 * Write some data to the specified offset in the buffer. 
+		 *
+		 * @note	All values are in bytes. Actual hardware buffer update is delayed until rendering.
+		 */
+		void write(UINT32 offset, const void* data, UINT32 size);
+
+		/**
+		 * Read some data from the specified offset in the buffer.
+		 *			
+		 * @note	All values are in bytes. This reads from the cached CPU buffer and not from the GPU.
+		 */
+		void read(UINT32 offset, void* data, UINT32 size);
+
+		/**
+		 * Clear specified section of the buffer to zero.
+		 *
+		 * @note	All values are in bytes. Actual hardware buffer update is delayed until rendering.
+		 */
+		void zeroOut(UINT32 offset, UINT32 size);
+
+		/** Returns internal cached data of the buffer. */
+		const UINT8* getCachedData() const { return mCachedData; }
+
+		/**	Returns the size of the buffer in bytes. */
+		UINT32 getSize() const { return mSize; }
+
+		/**	Retrieves a core implementation of a GPU param block buffer usable only from the core thread. */
+		SPtr<GpuParamBlockBufferCore> getCore() const;
+
+		/** @copydoc HardwareBufferManager::createGpuParamBlockBuffer */
+		static GpuParamBlockBufferPtr create(UINT32 size, GpuParamBlockUsage usage = GPBU_DYNAMIC);
+
+	protected:
+		/** @copydoc CoreObject::createCore */
+		SPtr<CoreObjectCore> createCore() const override;
+
+		/** @copydoc CoreObject::syncToCore */
+		virtual CoreSyncData syncToCore(FrameAlloc* allocator) override;
+
+		GpuParamBlockUsage mUsage;
+		UINT32 mSize;
+		UINT8* mCachedData;
+	};
+
+	/** @endcond */
+	/** @} */
 }

+ 54 - 57
BansheeCore/Include/BsGpuParamDesc.h

@@ -1,58 +1,55 @@
-#pragma once
-
-#include "BsCorePrerequisites.h"
-
-namespace BansheeEngine
-{
-	/**
-	 * @brief	Describes a single GPU program data (e.g. int, float, Vector2) parameter.
-	 */
-	struct GpuParamDataDesc
-	{
-		String name;
-		UINT32 elementSize; /**< In multiples of 4 bytes. */
-		UINT32 arraySize;
-		UINT32 arrayElementStride; /**< In multiples of 4 bytes. */
-		GpuParamDataType type;
-
-		UINT32 paramBlockSlot;
-		UINT32 gpuMemOffset; /**< In multiples of 4 bytes, or index for parameters not in a buffer. */
-		UINT32 cpuMemOffset; /**< In multiples of 4 bytes. */
-	};
-
-	/**
-	 * @brief	Describes a single GPU program object (e.g. texture, sampler state) parameter.
-	 */
-	struct GpuParamObjectDesc
-	{
-		String name;
-		GpuParamObjectType type;
-
-		UINT32 slot;
-	};
-
-	/**
-	 * @brief	Describes a GPU program parameter block (collection of GPU program data parameters).
-	 */
-	struct GpuParamBlockDesc
-	{
-		String name;
-		UINT32 slot;
-		UINT32 blockSize; /**< In multiples of 4 bytes. */
-		bool isShareable;
-	};
-
-	/**
-	 * @brief	Contains all parameter information for a GPU program, including data and object parameters,
-	 *			plus parameter blocks.
-	 */
-	struct GpuParamDesc
-	{
-		Map<String, GpuParamBlockDesc> paramBlocks;
-		Map<String, GpuParamDataDesc> params;
-
-		Map<String, GpuParamObjectDesc> samplers;
-		Map<String, GpuParamObjectDesc> textures;
-		Map<String, GpuParamObjectDesc> buffers;
-	};
+#pragma once
+
+#include "BsCorePrerequisites.h"
+
+namespace BansheeEngine
+{
+	/** @addtogroup RenderAPI
+	 *  @{
+	 */
+
+	/** Describes a single GPU program data (e.g. int, float, Vector2) parameter. */
+	struct GpuParamDataDesc
+	{
+		String name;
+		UINT32 elementSize; /**< In multiples of 4 bytes. */
+		UINT32 arraySize;
+		UINT32 arrayElementStride; /**< In multiples of 4 bytes. */
+		GpuParamDataType type;
+
+		UINT32 paramBlockSlot;
+		UINT32 gpuMemOffset; /**< In multiples of 4 bytes, or index for parameters not in a buffer. */
+		UINT32 cpuMemOffset; /**< In multiples of 4 bytes. */
+	};
+
+	/**	Describes a single GPU program object (e.g. texture, sampler state) parameter. */
+	struct GpuParamObjectDesc
+	{
+		String name;
+		GpuParamObjectType type;
+
+		UINT32 slot;
+	};
+
+	/**	Describes a GPU program parameter block (collection of GPU program data parameters). */
+	struct GpuParamBlockDesc
+	{
+		String name;
+		UINT32 slot;
+		UINT32 blockSize; /**< In multiples of 4 bytes. */
+		bool isShareable;
+	};
+
+	/** Contains all parameter information for a GPU program, including data and object parameters, plus parameter blocks. */
+	struct GpuParamDesc
+	{
+		Map<String, GpuParamBlockDesc> paramBlocks;
+		Map<String, GpuParamDataDesc> params;
+
+		Map<String, GpuParamObjectDesc> samplers;
+		Map<String, GpuParamObjectDesc> textures;
+		Map<String, GpuParamObjectDesc> buffers;
+	};
+
+	/** @} */
 }

+ 88 - 161
BansheeCore/Include/BsGpuParams.h

@@ -9,9 +9,12 @@
 
 namespace BansheeEngine
 {
-	/**
-	 * @brief	Stores information needed for binding a texture to the pipeline.
+	/** @addtogroup Implementation
+	 *  @{
 	 */
+	/** @cond INTERNAL */
+
+	/** Stores information needed for binding a texture to the pipeline. */
 	struct BoundTextureInfo
 	{
 		BoundTextureInfo()
@@ -22,9 +25,7 @@ namespace BansheeEngine
 		TextureSurface surface;
 	};
 
-	/**
-	 * @brief	Helper structure whose specializations convert an engine data type into a GPU program data parameter type. 
-	 */
+	/**	Helper structure whose specializations convert an engine data type into a GPU program data parameter type.  */
 	template<class T> struct TGpuDataParamInfo { };
 	template<> struct TGpuDataParamInfo < float > { enum { TypeId = GPDT_FLOAT1 }; };
 	template<> struct TGpuDataParamInfo < Vector2 > { enum { TypeId = GPDT_FLOAT2 }; };
@@ -45,21 +46,19 @@ namespace BansheeEngine
 	template<> struct TGpuDataParamInfo < Matrix4x3 > { enum { TypeId = GPDT_MATRIX_4X3 }; };
 	template<> struct TGpuDataParamInfo < Color > { enum { TypeId = GPDT_COLOR }; };
 
-	/**
-	 * @brief	Contains functionality common for both sim and core thread
-	 *			version of GpuParams.
-	 */
+	/** @endcond */
+	
+	/** Contains functionality common for both sim and core thread version of GpuParams. */
 	class BS_CORE_EXPORT GpuParamsBase
 	{
 	public:
 		/**
-		 * @brief	Creates new GpuParams object using the specified parameter descriptions.
+		 * Creates new GpuParams object using the specified parameter descriptions.
 		 *
-		 * @param	paramDesc			Reference to parameter descriptions that will be used for
-		 *								finding needed parameters.
-		 * @param	transposeMatrices	If true the stored matrices will be transposed before
-		 *								submitted to the GPU (some APIs require different
-		 *								matrix layout).
+		 * @param[in]	paramDesc			Reference to parameter descriptions that will be used for finding needed 
+		 *									parameters.
+		 * @param[in]	transposeMatrices	If true the stored matrices will be transposed before submitted to the GPU 
+		 *									(some APIs require different matrix layout).
 		 *
 		 * @note	You normally do not want to call this manually. Instead use GpuProgram::createParameters.
 		 */
@@ -71,64 +70,42 @@ namespace BansheeEngine
 		GpuParamsBase(const GpuParamsBase& other) = delete;
 		GpuParamsBase& operator=(const GpuParamsBase& rhs) = delete;
 
-		/**
-		 * @brief	Returns a description of all stored parameters.
-		 */
+		/** Returns a description of all stored parameters. */
 		const GpuParamDesc& getParamDesc() const { return *mParamDesc; }
 
 		/**
-		 * @brief	Returns the size of a data parameter with the specified name, in bytes.
-		 *			Returns 0 if such parameter doesn't exist.
+		 * Returns the size of a data parameter with the specified name, in bytes. Returns 0 if such parameter doesn't exist.
 		 */
 		UINT32 getDataParamSize(const String& name) const;
 
-		/**
-		 * @brief	Checks if parameter with the specified name exists.
-		 */
+		/** Checks if parameter with the specified name exists. */
 		bool hasParam(const String& name) const;
 
-		/**
-		 * @brief	Checks if texture parameter with the specified name exists.
-		 */
+		/**	Checks if texture parameter with the specified name exists. */
 		bool hasTexture(const String& name) const;
 
-		/**
-		 * @brief	Checks if sampler state parameter with the specified name exists.
-		 */
+		/**	Checks if sampler state parameter with the specified name exists. */
 		bool hasSamplerState(const String& name) const;
 
-		/**
-		 * @brief	Checks if a parameter block with the specified name exists.
-		 */
+		/** Checks if a parameter block with the specified name exists. */
 		bool hasParamBlock(const String& name) const;
 
 		/**
-		 * @brief	Checks is the texture at the specified slot to be bound as
-		 *			random load/store texture instead of a normal sampled texture.
+		 * Checks is the texture at the specified slot to be bound as random load/store texture instead of a normal sampled
+		 * texture.
 		 */
 		bool isLoadStoreTexture(UINT32 slot) const;
 
-		/**
-		 * @brief	Changes the type of the texture at the specified slot.
-		 */
+		/** Changes the type of the texture at the specified slot. */
 		void setIsLoadStoreTexture(UINT32 slot, bool isLoadStore);
 
-		/**
-		 * @brief	Returns information that determines which texture surfaces to bind
-		 *			as load/store parameters.
-		 */
+		/** Returns information that determines which texture surfaces to bind as load/store parameters. */
 		const TextureSurface& getLoadStoreSurface(UINT32 slot) const;
 
-		/**
-		 * @brief	Sets information that determines which texture surfaces to bind
-		 *			as load/store parameters.
-		 */
+		/**	Sets information that determines which texture surfaces to bind	as load/store parameters. */
 		void setLoadStoreSurface(UINT32 slot, const TextureSurface& surface) const;
 
-		/**
-		 * @brief	Checks whether matrices should be transformed before
-		 *			being written to the parameter buffer.
-		 */
+		/**	Checks whether matrices should be transformed before being written to the parameter buffer. */
 		bool getTransposeMatrices() const { return mTransposeMatrices; }
 
 		/**
@@ -146,9 +123,7 @@ namespace BansheeEngine
 		virtual void _markResourcesDirty() { }
 
 	protected:
-		/**
-		 * @brief	Gets a descriptor for a data parameter with the specified name.
-		 */
+		/**	Gets a descriptor for a data parameter with the specified name. */
 		GpuParamDataDesc* getParamDesc(const String& name) const;
 
 		GpuParamDescPtr mParamDesc;
@@ -162,10 +137,7 @@ namespace BansheeEngine
 		bool mTransposeMatrices;
 	};
 
-	/**
-	 * @brief	Templated version of GpuParams that contains functionality for both
-	 *			sim and core thread versions of stored data.
-	 */
+	/** Templated version of GpuParams that contains functionality for both sim and core thread versions of stored data. */
 	template <bool Core>
 	class BS_CORE_EXPORT TGpuParams : public GpuParamsBase
 	{
@@ -193,51 +165,46 @@ namespace BansheeEngine
 		typedef typename TTypes<Core>::SamplerType SamplerType;
 		typedef typename TTypes<Core>::ParamsBufferType ParamsBufferType;
 
-		/**
-		 * @copydoc	GpuParamsBase::GpuParamsBase(const GpuParamDescPtr&, bool)
-		 */
+		/** @copydoc GpuParamsBase::GpuParamsBase(const GpuParamDescPtr&, bool) */
 		TGpuParams(const GpuParamDescPtr& paramDesc, bool transposeMatrices);
 
 		virtual ~TGpuParams();
 
 		/**
-		 * @brief	Binds a new parameter buffer to the specified slot. Any following parameter reads or
-		 *			writes that are referencing that buffer slot will use the new buffer.
+		 * Binds a new parameter buffer to the specified slot. Any following parameter reads or writes that are referencing
+		 * that buffer slot will use the new buffer.
 		 *
-		 * @note	This is useful if you want to share a parameter buffer among multiple GPU programs. 
-		 *			You would only set the values once and then share the buffer among all other GpuParams.
-		 *
-		 *			It is up to the caller to guarantee the provided buffer matches parameter block
-		 *			descriptor for this slot.
+		 * @note	
+		 * This is useful if you want to share a parameter buffer among multiple GPU programs. You would only set the 
+		 * values once and then share the buffer among all other GpuParams.
+		 * @note
+		 * It is up to the caller to guarantee the provided buffer matches parameter block descriptor for this slot.
 		 */
 		void setParamBlockBuffer(UINT32 slot, const ParamsBufferType& paramBlockBuffer);
 
 		/**
-		 * @brief	Replaces the parameter buffer with the specified name. Any following parameter reads or
-		 *			writes that are referencing that buffer will use the new buffer.
-		 *
-		 * @note	This is useful if you want to share a parameter buffer among multiple GPU programs.
-		 *			You would only set the values once and then share the buffer among all other GpuParams.
+		 * Replaces the parameter buffer with the specified name. Any following parameter reads or writes that are 
+		 * referencing that buffer will use the new buffer.
 		 *
-		 *			It is up to the caller to guarantee the provided buffer matches parameter block
-		 *			descriptor for this slot.
+		 * @note	
+		 * This is useful if you want to share a parameter buffer among multiple GPU programs. You would only set the 
+		 * values once and then share the buffer among all other GpuParams.
+		 * @note
+		 * It is up to the caller to guarantee the provided buffer matches parameter block descriptor for this slot.
 		 */
 		void setParamBlockBuffer(const String& name, const ParamsBufferType& paramBlockBuffer);
 
 		/**
-		 * @brief	Returns a handle for the parameter with the specified name.
-		 *			Handle may then be stored and used for quickly setting or retrieving
-		 *			values to/from that parameter.
+		 * Returns a handle for the parameter with the specified name. Handle may then be stored and used for quickly 
+		 * setting or retrieving values to/from that parameter.
+		 *
+		 * Throws exception if parameter with that name and type doesn't exist.
 		 *
-		 *			Throws exception if parameter with that name and type doesn't exist.
-		*
-		*			Parameter handles will be invalidated when their parent GpuParams object changes.
-		*/
+		 * Parameter handles will be invalidated when their parent GpuParams object changes.
+		 */
 		template<class T> void getParam(const String& name, TGpuDataParam<T, Core>& output) const;
 
-		/**
-		 * @copydoc	getParam(const String&, TGpuDataParam<T, Core>&)
-		 */
+		/** @copydoc getParam(const String&, TGpuDataParam<T, Core>&) */
 		void getStructParam(const String& name, TGpuParamStruct<Core>& output) const;
 
 		/**
@@ -245,45 +212,29 @@ namespace BansheeEngine
 		 */
 		void getTextureParam(const String& name, TGpuParamTexture<Core>& output) const;
 
-		/**
-		 * @copydoc	getParam(const String&, TGpuDataParam<T, Core>&)
-		 */
+		/** @copydoc getParam(const String&, TGpuDataParam<T, Core>&) */
 		void getLoadStoreTextureParam(const String& name, TGpuParamLoadStoreTexture<Core>& output) const;
 
-		/**
-		 * @copydoc	getParam(const String&, TGpuDataParam<T, Core>&)
-		 */
+		/** @copydoc getParam(const String&, TGpuDataParam<T, Core>&) */
 		void getSamplerStateParam(const String& name, TGpuParamSampState<Core>& output) const;
 
-		/**
-		 * @brief	Gets a parameter block buffer from the specified slot.
-		 */
+		/**	Gets a parameter block buffer from the specified slot. */
 		ParamsBufferType getParamBlockBuffer(UINT32 slot) const;
 
-		/**
-		 * @brief	Gets a texture bound to the specified slot.
-		 */
+		/**	Gets a texture bound to the specified slot. */
 		TextureType getTexture(UINT32 slot);
 
-		/**
-		 * @brief	Gets a sampler state bound to the specified slot.
-		 */
+		/**	Gets a sampler state bound to the specified slot. */
 		SamplerType getSamplerState(UINT32 slot);
 
-		/**
-		 * @brief	Sets a texture at the specified slot.
-		 */
+		/**	Sets a texture at the specified slot. */
 		void setTexture(UINT32 slot, const TextureType& texture);
 
-		/**
-		 * @brief	Sets a sampler state at the specified slot.
-		 */
+		/**	Sets a sampler state at the specified slot. */
 		void setSamplerState(UINT32 slot, const SamplerType& sampler);
 
 	protected:
-		/**
-		 * @copydoc	CoreObject::getThisPtr
-		 */
+		/** @copydoc CoreObject::getThisPtr */
 		virtual SPtr<GpuParamsType> _getThisPtr() const = 0;
 
 		ParamsBufferType* mParamBlockBuffers;
@@ -291,11 +242,17 @@ namespace BansheeEngine
 		SamplerType* mSamplerStates;
 	};
 
+	/** @} */
+
+	/** @addtogroup RenderAPI
+	 *  @{
+	 */
+
+	/** @cond INTERNAL */
+
 	/**
 	 * @brief	Core thread version of GpuParams.
 	 *
-	 * @see		GpuParams
-	 *
 	 * @note	Core thread only.
 	 */
 	class BS_CORE_EXPORT GpuParamsCore : public CoreObjectCore, public TGpuParams<true>
@@ -303,42 +260,31 @@ namespace BansheeEngine
 	public:
 		~GpuParamsCore() { }
 
-		/**
-		 * @brief	Uploads all CPU stored parameter buffer data to the GPU buffers.
-		 */
+		/** Uploads all CPU stored parameter buffer data to the GPU buffers. */
 		void updateHardwareBuffers();
 
-		/**
-		 * @copydoc	GpuParamsBase::GpuParamsBase
-		 */
+		/** @copydoc GpuParamsBase::GpuParamsBase */
 		static SPtr<GpuParamsCore> create(const GpuParamDescPtr& paramDesc, bool transposeMatrices);
 
 	protected:
 		friend class GpuParams;
 
-		/**
-		 * @copydoc	GpuParamsBase::GpuParamsBase
-		 */
+		/** @copydoc GpuParamsBase::GpuParamsBase */
 		GpuParamsCore(const GpuParamDescPtr& paramDesc, bool transposeMatrices);
 
-		/**
-		 * @copydoc	CoreObject::getThisPtr
-		 */
+		/** @copydoc CoreObject::getThisPtr */
 		SPtr<GpuParamsCore> _getThisPtr() const override;
 
-		/**
-		 * @copydoc	CoreObjectCore::syncToCore
-		 */
+		/** @copydoc CoreObjectCore::syncToCore */
 		void syncToCore(const CoreSyncData& data) override;
 	};
 
+	/** @endcond */
+
 	/**
-	 * @brief	Contains descriptions for all parameters in a GPU program and also
-	 *			allows you to write and read those parameters. All parameter values
-	 *			are stored internally on the CPU, and are only submitted to the GPU
-	 *			once the parameters are bound to the pipeline.
-	 *
-	 * @see		CoreThreadAccessor::setConstantBuffers
+	 * Contains descriptions for all parameters in a GPU program and also allows you to write and read those parameters. 
+	 * All parameter values are stored internally on the CPU, and are only submitted to the GPU once the parameters are 
+	 * bound to the pipeline.
 	 *
 	 * @note	Sim thread only.
 	 */
@@ -361,56 +307,37 @@ namespace BansheeEngine
 		 */
 		void _markResourcesDirty() override;
 
-		/**
-		 * @brief	Retrieves a core implementation of a mesh usable only from the
-		 *			core thread.
-		 */
+		/** Retrieves a core implementation of a mesh usable only from the core thread. */
 		SPtr<GpuParamsCore> getCore() const;
 
-		/**
-		 * @copydoc	GpuParamsBase::GpuParamsBase
-		 */
+		/** @copydoc GpuParamsBase::GpuParamsBase */
 		static SPtr<GpuParams> create(const GpuParamDescPtr& paramDesc, bool transposeMatrices);
 
-		/**
-		 * @brief	Contains a lookup table for sizes of all data parameters. Sizes are in bytes.
-		 */
+		/** Contains a lookup table for sizes of all data parameters. Sizes are in bytes. */
 		const static GpuDataParamInfos PARAM_SIZES;
 
 	protected:
-		/**
-		 * @copydoc	GpuParamsBase::GpuParamsBase
-		 */
+		/** @copydoc GpuParamsBase::GpuParamsBase */
 		GpuParams(const GpuParamDescPtr& paramDesc, bool transposeMatrices);
 
-		/**
-		 * @copydoc	CoreObject::getThisPtr
-		 */
+		/** @copydoc CoreObject::getThisPtr */
 		SPtr<GpuParams> _getThisPtr() const override;
 
-		/**
-		 * @copydoc	CoreObject::createCore
-		 */
+		/** @copydoc CoreObject::createCore */
 		SPtr<CoreObjectCore> createCore() const override;
 
-		/**
-		 * @copydoc	CoreObject::syncToCore
-		 */
+		/** @copydoc CoreObject::syncToCore */
 		CoreSyncData syncToCore(FrameAlloc* allocator) override;
 
-		/**
-		 * @copydoc	IResourceListener::getListenerResources
-		 */
+		/** @copydoc IResourceListener::getListenerResources */
 		void getListenerResources(Vector<HResource>& resources) override;
 
-		/**
-		 * @copydoc IResourceListener::notifyResourceLoaded
-		 */
+		/** @copydoc IResourceListener::notifyResourceLoaded */
 		void notifyResourceLoaded(const HResource& resource) override { markCoreDirty(); }
 
-		/**
-		 * @copydoc IResourceListener::notifyResourceChanged
-		 */
+		/** @copydoc IResourceListener::notifyResourceChanged */
 		void notifyResourceChanged(const HResource& resource) override { markCoreDirty(); }
 	};
+
+	/** @} */
 }

+ 243 - 282
BansheeCore/Include/BsGpuProgram.h

@@ -1,283 +1,244 @@
-#pragma once
-
-#include "BsCorePrerequisites.h"
-#include "BsDrawOps.h"
-#include "BsCoreObject.h"
-#include "BsIReflectable.h"
-#include "BsGpuParamDesc.h"
-
-namespace BansheeEngine 
-{
-	/**
-	 * @brief	Types of programs that may run on GPU.
-	 */
-	enum GpuProgramType
-	{
-		GPT_VERTEX_PROGRAM,
-		GPT_FRAGMENT_PROGRAM,
-		GPT_GEOMETRY_PROGRAM,
-		GPT_DOMAIN_PROGRAM,
-		GPT_HULL_PROGRAM,
-		GPT_COMPUTE_PROGRAM
-	};
-
-	/**
-	 * @brief	GPU program profiles representing supported
-	 *			feature sets.
-	 */
-	enum GpuProgramProfile
-	{
-		GPP_NONE,
-		GPP_FS_1_1,
-		GPP_FS_1_2,
-		GPP_FS_1_3,
-		GPP_FS_1_4,
-		GPP_FS_2_0,
-		GPP_FS_2_x,
-		GPP_FS_2_a,
-		GPP_FS_2_b,
-		GPP_FS_3_0,
-		GPP_FS_3_x,
-		GPP_FS_4_0,
-		GPP_FS_4_1,
-		GPP_FS_5_0,
-		GPP_VS_1_1,
-		GPP_VS_2_0,
-		GPP_VS_2_x,
-		GPP_VS_2_a,
-		GPP_VS_3_0,
-		GPP_VS_4_0,
-		GPP_VS_4_1,
-		GPP_VS_5_0,
-		GPP_GS_4_0,
-		GPP_GS_4_1,
-		GPP_GS_5_0,
-		GPP_HS_5_0,
-		GPP_DS_5_0,
-		GPP_CS_5_0
-	};
-
-	/**
-	 * @brief	Data describing a GpuProgram.
-	 */
-	class BS_CORE_EXPORT GpuProgramProperties
-	{
-	public:
-		GpuProgramProperties(const String& source, const String& entryPoint,
-			GpuProgramType gptype, GpuProgramProfile profile);
-
-		virtual ~GpuProgramProperties() { }
-
-        /**
-         * @brief	Source used for creating this program.
-         */
-        const String& getSource() const { return mSource; }
-        
-		/**
-		 * @brief	Type of GPU program (e.g. fragment, vertex)
-		 */
-        GpuProgramType getType() const { return mType; }
-
-		/**
-		 * @brief	Profile of the GPU program (e.g. VS_4_0, VS_5_0)
-		 */
-		GpuProgramProfile getProfile() const { return mProfile; }
-
-		/**
-		 * @brief	Name of the program entry method (e.g. "main")
-		 */
-		const String& getEntryPoint() const { return mEntryPoint; }
-
-	protected:
-		friend class GpuProgramRTTI;
-
-		GpuProgramType mType;
-		String mEntryPoint;
-		GpuProgramProfile mProfile;
-		String mSource;
-	};
-
-	/**
-	 * @brief	Core thread version of a GpuProgram.
-	 *
-	 * @see	GpuProgram
-	 *
-	 * @note	Core thread only.
-	 */
-	class BS_CORE_EXPORT GpuProgramCore : public CoreObjectCore
-	{
-	public:
-		virtual ~GpuProgramCore() { }
-
-		/**
-		 * @brief	Returns whether this program can be supported on the current renderer and hardware.
-		 */
-        virtual bool isSupported() const;
-
-		/**
-		 * @brief	Returns true if shader was successfully compiled. 
-		 */
-		virtual bool isCompiled() const { return mIsCompiled; }
-
-		/**
-		 * @brief	Returns an error message returned by the compiler, if the compilation failed.
-		 */
-		virtual String getCompileErrorMessage() const { return mCompileError; }
-
-		/**
-		 * @brief	Sets whether this geometry program requires adjacency information
-		 *			from the input primitives.
-		 *
-		 * @note	Only relevant for geometry programs.
-		 */
-		virtual void setAdjacencyInfoRequired(bool required) { mNeedsAdjacencyInfo = required; }
-
-		/**
-		 * @brief	Returns whether this geometry program requires adjacency information
-		 *			from the input primitives.
-		 *
-		 * @note	Only relevant for geometry programs.
-		 */
-		virtual bool isAdjacencyInfoRequired() const { return mNeedsAdjacencyInfo; }
-
-		/**
-		 * @copydoc	GpuProgram::createParameters
-		 */
-		virtual SPtr<GpuParamsCore> createParameters();
-
-		/**
-		 * @copydoc	GpuProgram::getParamDesc
-		 */
-		GpuParamDescPtr getParamDesc() const { return mParametersDesc; }
-
-		/**
-		 * @brief	Returns GPU program input declaration. Only relevant for vertex programs.
-		 */
-		SPtr<VertexDeclarationCore> getInputDeclaration() const { return mInputDeclaration; }
-
-		/**
-		 * @brief	Returns properties that contain information about the GPU program.
-		 */
-		const GpuProgramProperties& getProperties() const { return mProperties; }
-
-		/**
-		 * @copydoc	GpuProgram::create
-		 */
-		static SPtr<GpuProgramCore> create(const String& source, const String& entryPoint, const String& language, GpuProgramType gptype,
-			GpuProgramProfile profile, bool requiresAdjacency = false);
-
-	protected:
-		GpuProgramCore(const String& source, const String& entryPoint,
-			GpuProgramType gptype, GpuProgramProfile profile, bool isAdjacencyInfoRequired = false);
-
-		/**
-		 * @brief	Returns whether required capabilities for this program is supported.
-		 */
-		bool isRequiredCapabilitiesSupported() const;
-
-		bool mNeedsAdjacencyInfo;
-
-		bool mIsCompiled;
-		String mCompileError;
-
-		GpuParamDescPtr mParametersDesc;
-		SPtr<VertexDeclarationCore> mInputDeclaration;
-		GpuProgramProperties mProperties;
-	};
-
-	/**
-	 * @brief	Contains a GPU program such as vertex or fragment program which gets
-	 *			compiled from the provided source code.
-	 *
-	 * @note	Sim thread only.
-	 */
-	class BS_CORE_EXPORT GpuProgram : public IReflectable, public CoreObject
-	{
-	public:
-		virtual ~GpuProgram() { }
-
-		/**
-		 * @brief	Returns true if shader was successfully compiled. 
-		 *
-		 * @note	Only valid after core thread has initialized the program.
-		 */
-		bool isCompiled() const;
-
-		/**
-		 * @brief	Returns an error message returned by the compiler, if the compilation failed.
-		 *
-		 * @note	Only valid after core thread has initialized the program.
-		 */
-		String getCompileErrorMessage() const;
-
-		/**
-		 * @brief	Creates a new parameters object compatible with this program definition. You
-		 *			may populate the returned object with actual parameter values and bind it
-		 *			to the pipeline to render an object using those values and this program.
-		 *
-		 * @note	Only valid after core thread has initialized the program.
-		 */
-		GpuParamsPtr createParameters();
-
-		/**
-		 * @brief	Returns description of all parameters in this GPU program.
-		 *
-		 * @note	Only valid after core thread has initialized the program.
-		 */
-		GpuParamDescPtr getParamDesc() const;
-
-		/**
-		 * @brief	Retrieves a core implementation of a gpu program usable only from the
-		 *			core thread.
-		 */
-		SPtr<GpuProgramCore> getCore() const;
-
-		/**
-		 * @brief	Returns properties that contain information about the GPU program.
-		 */
-		const GpuProgramProperties& getProperties() const { return mProperties; }
-
-		/**
-		 * @brief	Creates a new GPU program using the provided source code. If compilation fails or program is not supported
-		 *			"isCompiled" with return false, and you will be able to retrieve the error message via "getCompileErrorMessage".
-		 *
-		 * @param	source		Source code to compile the shader from.
-		 * @param	entryPoint	Name of the entry point function, e.g. "main".
-		 * @param	language	Language the source is written in, e.g. "hlsl" or "glsl".
-		 * @param	gptype		Type of the program, e.g. vertex or fragment.
-		 * @param	profile		Program profile specifying supported feature-set. Must match the type.
-		 * @param	requiresAdjacency	If true then adjacency information will be provided when rendering using this program.
-		 */
-		static GpuProgramPtr create(const String& source, const String& entryPoint, const String& language, GpuProgramType gptype,
-			GpuProgramProfile profile, bool requiresAdjacency = false);
-
-	protected:
-		friend class GpuProgramManager;
-
-		GpuProgram(const String& source, const String& entryPoint, const String& language,
-			GpuProgramType gptype, GpuProgramProfile profile, bool isAdjacencyInfoRequired = false);
-
-		/**
-		 * @copydoc	CoreObject::createCore
-		 */
-		SPtr<CoreObjectCore> createCore() const;
-
-		/**
-		 * @copydoc Resource::calculateSize
-		 */
-		size_t calculateSize() const { return 0; } // TODO 
-
-	protected:
-		bool mNeedsAdjacencyInfo;
-		String mLanguage;
-		GpuProgramProperties mProperties;
-
-		/************************************************************************/
-		/* 								SERIALIZATION                      		*/
-		/************************************************************************/
-	public:
-		friend class GpuProgramRTTI;
-		static RTTITypeBase* getRTTIStatic();
-		virtual RTTITypeBase* getRTTI() const;
-	};
+#pragma once
+
+#include "BsCorePrerequisites.h"
+#include "BsCoreObject.h"
+#include "BsIReflectable.h"
+
+namespace BansheeEngine 
+{
+	/** @addtogroup RenderAPI
+	 *  @{
+	 */
+
+	/** Types of programs that may run on GPU. */
+	enum GpuProgramType
+	{
+		GPT_VERTEX_PROGRAM,
+		GPT_FRAGMENT_PROGRAM,
+		GPT_GEOMETRY_PROGRAM,
+		GPT_DOMAIN_PROGRAM,
+		GPT_HULL_PROGRAM,
+		GPT_COMPUTE_PROGRAM
+	};
+
+	/**	GPU program profiles representing supported feature sets. */
+	enum GpuProgramProfile
+	{
+		GPP_NONE,
+		GPP_FS_1_1,
+		GPP_FS_1_2,
+		GPP_FS_1_3,
+		GPP_FS_1_4,
+		GPP_FS_2_0,
+		GPP_FS_2_x,
+		GPP_FS_2_a,
+		GPP_FS_2_b,
+		GPP_FS_3_0,
+		GPP_FS_3_x,
+		GPP_FS_4_0,
+		GPP_FS_4_1,
+		GPP_FS_5_0,
+		GPP_VS_1_1,
+		GPP_VS_2_0,
+		GPP_VS_2_x,
+		GPP_VS_2_a,
+		GPP_VS_3_0,
+		GPP_VS_4_0,
+		GPP_VS_4_1,
+		GPP_VS_5_0,
+		GPP_GS_4_0,
+		GPP_GS_4_1,
+		GPP_GS_5_0,
+		GPP_HS_5_0,
+		GPP_DS_5_0,
+		GPP_CS_5_0
+	};
+
+	/** Data describing a GpuProgram. */
+	class BS_CORE_EXPORT GpuProgramProperties
+	{
+	public:
+		GpuProgramProperties(const String& source, const String& entryPoint,
+			GpuProgramType gptype, GpuProgramProfile profile);
+
+		virtual ~GpuProgramProperties() { }
+
+        /** Source used for creating this program. */
+        const String& getSource() const { return mSource; }
+        
+		/**	Type of GPU program (e.g. fragment, vertex). */
+        GpuProgramType getType() const { return mType; }
+
+		/**	Profile of the GPU program (e.g. VS_4_0, VS_5_0). */
+		GpuProgramProfile getProfile() const { return mProfile; }
+
+		/**	Name of the program entry method (e.g. "main"). */
+		const String& getEntryPoint() const { return mEntryPoint; }
+
+	protected:
+		friend class GpuProgramRTTI;
+
+		GpuProgramType mType;
+		String mEntryPoint;
+		GpuProgramProfile mProfile;
+		String mSource;
+	};
+
+	/** @cond INTERNAL */
+
+	/**
+	 * Core thread version of a GpuProgram.
+	 *
+	 * @note	Core thread only.
+	 */
+	class BS_CORE_EXPORT GpuProgramCore : public CoreObjectCore
+	{
+	public:
+		virtual ~GpuProgramCore() { }
+
+		/** Returns whether this program can be supported on the current renderer and hardware. */
+        virtual bool isSupported() const;
+
+		/** Returns true if shader was successfully compiled. */
+		virtual bool isCompiled() const { return mIsCompiled; }
+
+		/**	Returns an error message returned by the compiler, if the compilation failed. */
+		virtual String getCompileErrorMessage() const { return mCompileError; }
+
+		/**
+		 * Sets whether this geometry program requires adjacency information from the input primitives.
+		 *
+		 * @note	Only relevant for geometry programs.
+		 */
+		virtual void setAdjacencyInfoRequired(bool required) { mNeedsAdjacencyInfo = required; }
+
+		/**
+		 * Returns whether this geometry program requires adjacency information from the input primitives.
+		 *
+		 * @note	Only relevant for geometry programs.
+		 */
+		virtual bool isAdjacencyInfoRequired() const { return mNeedsAdjacencyInfo; }
+
+		/** @copydoc GpuProgram::createParameters */
+		virtual SPtr<GpuParamsCore> createParameters();
+
+		/** @copydoc GpuProgram::getParamDesc */
+		GpuParamDescPtr getParamDesc() const { return mParametersDesc; }
+
+		/**	Returns GPU program input declaration. Only relevant for vertex programs. */
+		SPtr<VertexDeclarationCore> getInputDeclaration() const { return mInputDeclaration; }
+
+		/**	Returns properties that contain information about the GPU program. */
+		const GpuProgramProperties& getProperties() const { return mProperties; }
+
+		/** @copydoc GpuProgram::create */
+		static SPtr<GpuProgramCore> create(const String& source, const String& entryPoint, const String& language, GpuProgramType gptype,
+			GpuProgramProfile profile, bool requiresAdjacency = false);
+
+	protected:
+		GpuProgramCore(const String& source, const String& entryPoint,
+			GpuProgramType gptype, GpuProgramProfile profile, bool isAdjacencyInfoRequired = false);
+
+		/** Returns whether required capabilities for this program is supported. */
+		bool isRequiredCapabilitiesSupported() const;
+
+		bool mNeedsAdjacencyInfo;
+
+		bool mIsCompiled;
+		String mCompileError;
+
+		GpuParamDescPtr mParametersDesc;
+		SPtr<VertexDeclarationCore> mInputDeclaration;
+		GpuProgramProperties mProperties;
+	};
+
+	/** @endcond */
+
+	/**
+	 * Contains a GPU program such as vertex or fragment program which gets compiled from the provided source code.
+	 *
+	 * @note	Sim thread only.
+	 */
+	class BS_CORE_EXPORT GpuProgram : public IReflectable, public CoreObject
+	{
+	public:
+		virtual ~GpuProgram() { }
+
+		/**
+		 * Returns true if shader was successfully compiled. 
+		 *
+		 * @note	Only valid after core thread has initialized the program.
+		 */
+		bool isCompiled() const;
+
+		/**
+		 * Returns an error message returned by the compiler, if the compilation failed.
+		 *
+		 * @note	Only valid after core thread has initialized the program.
+		 */
+		String getCompileErrorMessage() const;
+
+		/**
+		 * Creates a new parameters object compatible with this program definition. You may populate the returned object 
+		 * with actual parameter values and bind it to the pipeline to render an object using those values and this program.
+		 *
+		 * @note	Only valid after core thread has initialized the program.
+		 */
+		GpuParamsPtr createParameters();
+
+		/**
+		 * Returns description of all parameters in this GPU program.
+		 *
+		 * @note	Only valid after core thread has initialized the program.
+		 */
+		GpuParamDescPtr getParamDesc() const;
+
+		/** Retrieves a core implementation of a gpu program usable only from the core thread. */
+		SPtr<GpuProgramCore> getCore() const;
+
+		/** Returns properties that contain information about the GPU program. */
+		const GpuProgramProperties& getProperties() const { return mProperties; }
+
+		/**
+		 * Creates a new GPU program using the provided source code. If compilation fails or program is not supported
+		 * isCompiled() with return false, and you will be able to retrieve the error message via getCompileErrorMessage().
+		 *
+		 * @param[in]	source				Source code to compile the shader from.
+		 * @param[in]	entryPoint			Name of the entry point function, e.g. "main".
+		 * @param[in]	language			Language the source is written in, e.g. "hlsl" or "glsl".
+		 * @param[in]	gptype				Type of the program, e.g. vertex or fragment.
+		 * @param[in]	profile				Program profile specifying supported feature-set. Must match the type.
+		 * @param[in]	requiresAdjacency	If true then adjacency information will be provided when rendering using this 
+		 *									program.
+		 */
+		static GpuProgramPtr create(const String& source, const String& entryPoint, const String& language, GpuProgramType gptype,
+			GpuProgramProfile profile, bool requiresAdjacency = false);
+
+	protected:
+		friend class GpuProgramManager;
+
+		GpuProgram(const String& source, const String& entryPoint, const String& language,
+			GpuProgramType gptype, GpuProgramProfile profile, bool isAdjacencyInfoRequired = false);
+
+		/** @copydoc CoreObject::createCore */
+		SPtr<CoreObjectCore> createCore() const;
+
+		/** @copydoc Resource::calculateSize */
+		size_t calculateSize() const { return 0; } // TODO 
+
+	protected:
+		bool mNeedsAdjacencyInfo;
+		String mLanguage;
+		GpuProgramProperties mProperties;
+
+		/************************************************************************/
+		/* 								SERIALIZATION                      		*/
+		/************************************************************************/
+	public:
+		friend class GpuProgramRTTI;
+		static RTTITypeBase* getRTTIStatic();
+		RTTITypeBase* getRTTI() const override;
+	};
+
+	/** @} */
 }

+ 141 - 143
BansheeCore/Include/BsGpuProgramManager.h

@@ -1,144 +1,142 @@
-#pragma once
-
-#include "BsCorePrerequisites.h"
-#include "BsModule.h"
-#include "BsException.h"
-#include "BsGpuProgram.h"
-
-namespace BansheeEngine 
-{
-	/**
-	 * @brief	Factory responsible for creating GPU programs of a certain type.
-	 */
-	class BS_CORE_EXPORT GpuProgramFactory
-	{
-	public:
-        GpuProgramFactory() {}
-		virtual ~GpuProgramFactory() { }
-
-		/**
-		 * @brief	Returns GPU program language this factory is capable creating GPU programs from.
-		 */
-		virtual const String& getLanguage() const = 0;
-
-		/**
-		 * @brief	Creates a new GPU program using the provided source code. If compilation fails or program is not supported
-		 *			"isCompiled" method on the returned program will return false, and you will be able to retrieve the error message 
-		 *			via "getCompileErrorMessage".
-		 *
-		 * @param	source		Source code to compile the shader from.
-		 * @param	entryPoint	Name of the entry point function, e.g. "main".
-		 * @param	gptype		Type of the program, e.g. vertex or fragment.
-		 * @param	profile		Program profile specifying supported feature-set. Must match the type.
-		 * @param	requiresAdjacency	If true then adjacency information will be provided when rendering using this program.
-		 */
-		virtual SPtr<GpuProgramCore> create(const String& source, const String& entryPoint, GpuProgramType gptype, 
-			GpuProgramProfile profile, bool requiresAdjacencyInformation) = 0;
-
-		/**
-		 * @copydoc	GpuProgramManager::createEmpty
-		 */
-		virtual SPtr<GpuProgramCore> create(GpuProgramType type) = 0;
-	};
-
-	/**
-	 * @brief	Manager responsible for creating GPU programs. It will automatically
-	 *			try to find the appropriate handler for a specific GPU program language
-	 *			and create the program if possible.
-	 *
-	 * @note	Sim thread only.
-	 */
-	class BS_CORE_EXPORT GpuProgramManager : public Module<GpuProgramManager>
-	{
-	public:
-		/**
-		 * @brief	Creates a new GPU program using the provided source code. If compilation fails or program is not supported
-		 *			"isCompiled" method on the returned program will return false, and you will be able to retrieve the error message 
-		 *			via "getCompileErrorMessage".
-		 *
-		 * @param	source		Source code to compile the shader from.
-		 * @param	entryPoint	Name of the entry point function, e.g. "main".
-		 * @param	language	Language the source is written in, e.g. "hlsl" or "glsl".
-		 * @param	gptype		Type of the program, e.g. vertex or fragment.
-		 * @param	profile		Program profile specifying supported feature-set. Must match the type.
-		 * @param	requiresAdjacency	If true then adjacency information will be provided when rendering using this program.
-		 */
-		GpuProgramPtr create(const String& source, const String& entryPoint, const String& language, 
-			GpuProgramType gptype, GpuProgramProfile profile, bool requiresAdjacency = false);
-
-		/**
-		 * @brief	Creates a completely empty and uninitialized GpuProgram.
-		 * 			Should only be used for specific purposes, like deserialization,
-		 * 			as it requires additional manual initialization that is not required normally.
-		 */
-		GpuProgramPtr createEmpty(const String& language, GpuProgramType type);
-	};
-
-	/**
-	 * @brief	Manager responsible for creating GPU programs. It will automatically
-	 *			try to find the appropriate handler for a specific GPU program language
-	 *			and create the program if possible.
-	 *
-	 * @note	Core thread only.
-	 */
-	class BS_CORE_EXPORT GpuProgramCoreManager : public Module<GpuProgramCoreManager>
-	{
-	public:
-		GpuProgramCoreManager();
-		virtual ~GpuProgramCoreManager();
-
-		/**
-		 * @brief	Registers a new factory that is able to create GPU programs for a certain language.
-		 *			If any other factory for the same language exists, it will overwrite it.
-		 */
-		void addFactory(GpuProgramFactory* factory);
-
-		/**
-		 * @brief	Unregisters a GPU program factory, essentially making it not possible to create GPU programs
-		 *			using the language the factory supported.
-		 */
-		void removeFactory(GpuProgramFactory* factory);
-
-		/**
-		 * @brief	Query if a GPU program language is supported. (.e.g. "hlsl", "glsl").
-		 */
-		bool isLanguageSupported(const String& lang);
-
-		/**
-		 * @brief	Creates a new GPU program using the provided source code. If compilation fails or program is not supported
-		 *			"isCompiled" method on the returned program will return false, and you will be able to retrieve the error message 
-		 *			via "getCompileErrorMessage".
-		 *
-		 * @param	source		Source code to compile the shader from.
-		 * @param	entryPoint	Name of the entry point function, e.g. "main".
-		 * @param	language	Language the source is written in, e.g. "hlsl" or "glsl".
-		 * @param	gptype		Type of the program, e.g. vertex or fragment.
-		 * @param	profile		Program profile specifying supported feature-set. Must match the type.
-		 * @param	requiresAdjacency	If true then adjacency information will be provided when rendering using this program.
-		 */
-		SPtr<GpuProgramCore> create(const String& source, const String& entryPoint, const String& language, 
-			GpuProgramType gptype, GpuProgramProfile profile, bool requiresAdjacency = false);
-
-	protected:
-		friend class GpuProgram;
-
-		/**
-		 * @brief	Creates a GPU program without initializing it.
-		 *
-		 * @see		create
-		 */
-		SPtr<GpuProgramCore> createInternal(const String& source, const String& entryPoint, const String& language,
-			GpuProgramType gptype, GpuProgramProfile profile, bool requiresAdjacency = false);
-
-		/**
-		 * @brief	Attempts to find a factory for the specified language. Returns null if it cannot find one.
-		 */
-		GpuProgramFactory* getFactory(const String& language);
-
-	protected:
-		typedef Map<String, GpuProgramFactory*> FactoryMap;
-
-		FactoryMap mFactories;
-		GpuProgramFactory* mNullFactory; /**< Factory for dealing with GPU programs that can't be created. */
-	};
+#pragma once
+
+#include "BsCorePrerequisites.h"
+#include "BsModule.h"
+#include "BsException.h"
+#include "BsGpuProgram.h"
+
+namespace BansheeEngine 
+{
+	/** @cond INTERNAL */
+	/** @addtogroup RenderAPI
+	 *  @{
+	 */
+
+	/** Factory responsible for creating GPU programs of a certain type. */
+	class BS_CORE_EXPORT GpuProgramFactory
+	{
+	public:
+        GpuProgramFactory() {}
+		virtual ~GpuProgramFactory() { }
+
+		/**	Returns GPU program language this factory is capable creating GPU programs from. */
+		virtual const String& getLanguage() const = 0;
+
+		/**
+		 * Creates a new GPU program using the provided source code. If compilation fails or program is not supported
+		 * GpuProgram::isCompiled() method on the returned program will return false, and you will be able to retrieve 
+		 * the error message via GpuProgram::getCompileErrorMessage().
+		 *
+		 * @param[in]	source				Source code to compile the shader from.
+		 * @param[in]	entryPoint			Name of the entry point function, e.g. "main".
+		 * @param[in]	gptype				Type of the program, e.g. vertex or fragment.
+		 * @param[in]	profile				Program profile specifying supported feature-set. Must match the type.
+		 * @param[in]	requiresAdjacency	If true then adjacency information will be provided when rendering using this 
+		 *									program.
+		 */
+		virtual SPtr<GpuProgramCore> create(const String& source, const String& entryPoint, GpuProgramType gptype, 
+			GpuProgramProfile profile, bool requiresAdjacencyInformation) = 0;
+
+		/** @copydoc GpuProgramManager::createEmpty */
+		virtual SPtr<GpuProgramCore> create(GpuProgramType type) = 0;
+	};
+
+	/**
+	 * Manager responsible for creating GPU programs. It will automatically try to find the appropriate handler for a 
+	 * specific GPU program language and create the program if possible.
+	 *
+	 * @note	Sim thread only.
+	 */
+	class BS_CORE_EXPORT GpuProgramManager : public Module<GpuProgramManager>
+	{
+	public:
+		/**
+		 * Creates a new GPU program using the provided source code. If compilation fails or program is not supported
+		 * GpuProgram::isCompiled() method on the returned program will return false, and you will be able to retrieve the 
+		 * error message via GpuProgram::getCompileErrorMessage().
+		 *
+		 * @param[in]	source				Source code to compile the shader from.
+		 * @param[in]	entryPoint			Name of the entry point function, e.g. "main".
+		 * @param[in]	language			Language the source is written in, e.g. "hlsl" or "glsl".
+		 * @param[in]	gptype				Type of the program, e.g. vertex or fragment.
+		 * @param[in]	profile				Program profile specifying supported feature-set. Must match the type.
+		 * @param[in]	requiresAdjacency	If true then adjacency information will be provided when rendering using this 
+		 *									program.
+		 */
+		GpuProgramPtr create(const String& source, const String& entryPoint, const String& language, 
+			GpuProgramType gptype, GpuProgramProfile profile, bool requiresAdjacency = false);
+
+		/**
+		 * Creates a completely empty and uninitialized GpuProgram. Should only be used for specific purposes, like 
+		 * deserialization, as it requires additional manual initialization that is not required normally.
+		 */
+		GpuProgramPtr createEmpty(const String& language, GpuProgramType type);
+	};
+
+	/**
+	 * Manager responsible for creating GPU programs. It will automatically	try to find the appropriate handler for a 
+	 * specific GPU program language and create the program if possible.
+	 *
+	 * @note	Core thread only.
+	 */
+	class BS_CORE_EXPORT GpuProgramCoreManager : public Module<GpuProgramCoreManager>
+	{
+	public:
+		GpuProgramCoreManager();
+		virtual ~GpuProgramCoreManager();
+
+		/**
+		 * Registers a new factory that is able to create GPU programs for a certain language. If any other factory for the
+		 * same language exists, it will overwrite it.
+		 */
+		void addFactory(GpuProgramFactory* factory);
+
+		/**
+		 * Unregisters a GPU program factory, essentially making it not possible to create GPU programs using the language 
+		 * the factory supported.
+		 */
+		void removeFactory(GpuProgramFactory* factory);
+
+		/** Query if a GPU program language is supported. (.e.g. "hlsl", "glsl"). */
+		bool isLanguageSupported(const String& lang);
+
+		/**
+		 * Creates a new GPU program using the provided source code. If compilation fails or program is not supported
+		 * GpuProgramCore::isCompiled() method on the returned program will return false, and you will be able to retrieve 
+		 * the error message via GpuProgramCore::getCompileErrorMessage().
+		 *
+		 * @param[in]	source				Source code to compile the shader from.
+		 * @param[in]	entryPoint			Name of the entry point function, e.g. "main".
+		 * @param[in]	language			Language the source is written in, e.g. "hlsl" or "glsl".
+		 * @param[in]	gptype				Type of the program, e.g. vertex or fragment.
+		 * @param[in]	profile				Program profile specifying supported feature-set. Must match the type.
+		 * @param[in]	requiresAdjacency	If true then adjacency information will be provided when rendering using this 
+		 *									program.
+		 */
+		SPtr<GpuProgramCore> create(const String& source, const String& entryPoint, const String& language, 
+			GpuProgramType gptype, GpuProgramProfile profile, bool requiresAdjacency = false);
+
+	protected:
+		friend class GpuProgram;
+
+		/**
+		 * Creates a GPU program without initializing it.
+		 *
+		 * @see		create
+		 */
+		SPtr<GpuProgramCore> createInternal(const String& source, const String& entryPoint, const String& language,
+			GpuProgramType gptype, GpuProgramProfile profile, bool requiresAdjacency = false);
+
+		/** Attempts to find a factory for the specified language. Returns null if it cannot find one. */
+		GpuProgramFactory* getFactory(const String& language);
+
+	protected:
+		typedef Map<String, GpuProgramFactory*> FactoryMap;
+
+		FactoryMap mFactories;
+		GpuProgramFactory* mNullFactory; /**< Factory for dealing with GPU programs that can't be created. */
+	};
+
+	/** @} */
+	/** @endcond */
 }

+ 160 - 173
BansheeCore/Include/BsHardwareBuffer.h

@@ -1,173 +1,160 @@
-#pragma once
-
-#include "BsCorePrerequisites.h"
-
-namespace BansheeEngine 
-{
-	/**
-	 * @brief	Abstract class defining common features of hardware buffers. Hardware buffers usually
-	 *			represent areas of memory the GPU or the driver can access directly.
-	 *
-	 * @note	Be aware that reading from non-system memory hardware buffers is usually slow and should be avoided.
-	 */
-	class BS_CORE_EXPORT HardwareBuffer
-    {
-    public:
-        virtual ~HardwareBuffer() {}
-
-		/**
-		 * @brief	Locks a portion of the buffer and returns pointer to the locked area.
-		 *			You must call "unlock" when done.
-		 *
-		 * @param	offset	Offset in bytes from which to lock the buffer.
-		 * @param	length	Length of the area you want to lock, in bytes.
-		 * @param	options	Signifies what you want to do with the returned pointer.
-		 *					Caller must ensure not to do anything he hasn't requested.
-		 *					(e.g. don't try to read from the buffer unless you requested
-		 *					it here).
-		 */
-		virtual void* lock(UINT32 offset, UINT32 length, GpuLockOptions options)
-        {
-            assert(!isLocked() && "Cannot lock this buffer, it is already locked!");
-            void* ret = lockImpl(offset, length, options);
-            mIsLocked = true;
-
-			mLockStart = offset;
-			mLockSize = length;
-            return ret;
-        }
-
-		/**
-		 * @brief	Locks the entire buffer and returns pointer to the locked area.
-		 *			You must call "unlock" when done.
-		 *
-		 * @param	options	Signifies what you want to do with the returned pointer.
-		 *					Caller must ensure not to do anything he hasn't requested.
-		 *					(e.g. don't try to read from the buffer unless you requested
-		 *					it here).
-		 */
-        void* lock(GpuLockOptions options)
-        {
-            return this->lock(0, mSizeInBytes, options);
-        }
-
-		/**
-		 * @brief	Releases the lock on this buffer. 
-		 */
-		virtual void unlock()
-        {
-            assert(isLocked() && "Cannot unlock this buffer, it is not locked!");
-
-            unlockImpl();
-            mIsLocked = false;
-        }
-
-		/**
-		 * @brief	Reads data from a portion of the buffer and copies it to the destination
-		 *			buffer. Caller must ensure destination buffer is large enough.
-		 *
-		 * @param	offset	Offset in bytes from which to copy the data.
-		 * @param	length	Length of the area you want to copy, in bytes.
-		 * @param	dest	Destination buffer large enough to store the read data.
-		 */
-        virtual void readData(UINT32 offset, UINT32 length, void* dest) = 0;
-
-		/**
-		 * @brief	Writes data into a portion of the buffer from the source memory. 
-		 *
-		 * @param	offset		Offset in bytes from which to copy the data.
-		 * @param	length		Length of the area you want to copy, in bytes.
-		 * @param	source		Source buffer containing the data to write.
-		 * @param	writeFlags	Optional write flags that may affect performance.
-		 */
-        virtual void writeData(UINT32 offset, UINT32 length, const void* source,
-				BufferWriteType writeFlags = BufferWriteType::Normal) = 0;
-
-		/**
-		 * @brief	Copies data from a specific portion of the source buffer into a specific portion
-		 *			of this buffer.
-		 *
-		 * @param	srcBuffer			Buffer to copy from.
-		 * @param	srcOffset			Offset into the source buffer to start copying from, in bytes.
-		 * @param	dstOffset			Offset into this buffer to start copying to, in bytes.
-		 * @param	length				Size of the data to copy, in bytes.
-		 * @param	discardWholeBuffer	Specify true if the data in the current buffer can be entirely discarded. This
-		 *								may improve performance.
-		 */
-		virtual void copyData(HardwareBuffer& srcBuffer, UINT32 srcOffset, 
-			UINT32 dstOffset, UINT32 length, bool discardWholeBuffer = false)
-		{
-			const void *srcData = srcBuffer.lock(
-				srcOffset, length, GBL_READ_ONLY);
-			this->writeData(dstOffset, length, srcData, discardWholeBuffer ? BufferWriteType::Discard : BufferWriteType::Normal);
-			srcBuffer.unlock();
-		}
-
-		/**
-		 * @brief	Copy data from the provided buffer into this buffer. If buffers
-		 *			are not the same size, smaller size will be used.
-		 */
-		virtual void copyData(HardwareBuffer& srcBuffer)
-		{
-			UINT32 sz = std::min(getSizeInBytes(), srcBuffer.getSizeInBytes()); 
-			copyData(srcBuffer, 0, 0, sz, true);
-		}
-			
-		/**
-		 * @brief	Returns the size of this buffer in bytes.
-		 */
-        UINT32 getSizeInBytes(void) const { return mSizeInBytes; }
-
-		/**
-		 * @brief	Returns the Usage flags with which this buffer was created.
-		 */
-        GpuBufferUsage getUsage() const { return mUsage; }
-
-		/**
-		 * @brief	Returns whether this buffer is held in system memory.
-		 */
-		bool isSystemMemory() const { return mSystemMemory; }
-
-		/**
-		 * @brief	Returns whether or not this buffer is currently locked.
-		 */
-        bool isLocked() const { return mIsLocked; }	
-
-	protected:
-		friend class HardwareBufferManager;
-
-		/**
-		 * @brief	Constructs a new buffer.
-		 *
-		 * @param	usage			Determines most common usage of the buffer. Usually has effect on what
-		 *							type of memory will be buffer allocated in but that depends on render API.
-		 *							Specify dynamic if you plan on modifying it often, static otherwise.
-		 * @param	systemMemory	If enabled the the buffer will be kept in the system memory. System memory
-		 *							buffers are often used as a source or destination for copies from/to other
-		 *							buffers. Some APIs don't allow reading from non-system memory buffers.
-		 */
-		HardwareBuffer(GpuBufferUsage usage, bool systemMemory)
-			: mUsage(usage), mIsLocked(false), mSystemMemory(systemMemory)
-		{  }
-
-		/**
-		 * @copydoc	lock
-		 */
-		virtual void* lockImpl(UINT32 offset, UINT32 length, GpuLockOptions options) = 0;
-
-		/**
-		 * @copydoc	unlock
-		 */
-		virtual void unlockImpl() = 0;
-
-	protected:
-		UINT32 mSizeInBytes;
-		GpuBufferUsage mUsage;
-		bool mIsLocked;
-		UINT32 mLockStart;
-		UINT32 mLockSize;
-		bool mSystemMemory;
-    };
-}
-
-
+#pragma once
+
+#include "BsCorePrerequisites.h"
+
+namespace BansheeEngine 
+{
+	/** @cond INTERNAL */
+	/** @addtogroup RenderAPI
+	 *  @{
+	 */
+
+	/**
+	 * Abstract class defining common features of hardware buffers. Hardware buffers usually represent areas of memory the 
+	 * GPU or the driver can access directly.
+	 *
+	 * @note	Core thread only.
+	 * @note	Be aware that reading from non-system memory hardware buffers is usually slow and should be avoided.
+	 */
+	class BS_CORE_EXPORT HardwareBuffer
+    {
+    public:
+        virtual ~HardwareBuffer() {}
+
+		/**
+		 * Locks a portion of the buffer and returns pointer to the locked area. You must call unlock() when done.
+		 *
+		 * @param[in]	offset	Offset in bytes from which to lock the buffer.
+		 * @param[in]	length	Length of the area you want to lock, in bytes.
+		 * @param[in]	options	Signifies what you want to do with the returned pointer. Caller must ensure not to do 
+		 *						anything he hasn't requested (e.g. don't try to read from the buffer unless you requested 
+		 *						it here).
+		 */
+		virtual void* lock(UINT32 offset, UINT32 length, GpuLockOptions options)
+        {
+            assert(!isLocked() && "Cannot lock this buffer, it is already locked!");
+            void* ret = lockImpl(offset, length, options);
+            mIsLocked = true;
+
+			mLockStart = offset;
+			mLockSize = length;
+            return ret;
+        }
+
+		/**
+		 * Locks the entire buffer and returns pointer to the locked area. You must call unlock() when done.
+		 *
+		 * @param[in]	options	Signifies what you want to do with the returned pointer. Caller must ensure not to do 
+		 *						anything he hasn't requested (e.g. don't try to read from the buffer unless you requested
+		 *						it here).
+		 */
+        void* lock(GpuLockOptions options)
+        {
+            return this->lock(0, mSizeInBytes, options);
+        }
+
+		/**	Releases the lock on this buffer.  */
+		virtual void unlock()
+        {
+            assert(isLocked() && "Cannot unlock this buffer, it is not locked!");
+
+            unlockImpl();
+            mIsLocked = false;
+        }
+
+		/**
+		 * Reads data from a portion of the buffer and copies it to the destination buffer. Caller must ensure destination 
+		 * buffer is large enough.
+		 *
+		 * @param[in]	offset	Offset in bytes from which to copy the data.
+		 * @param[in]	length	Length of the area you want to copy, in bytes.
+		 * @param[in]	dest	Destination buffer large enough to store the read data.
+		 */
+        virtual void readData(UINT32 offset, UINT32 length, void* dest) = 0;
+
+		/**
+		 * Writes data into a portion of the buffer from the source memory. 
+		 *
+		 * @param[in]	offset		Offset in bytes from which to copy the data.
+		 * @param[in]	length		Length of the area you want to copy, in bytes.
+		 * @param[in]	source		Source buffer containing the data to write.
+		 * @param[in]	writeFlags	Optional write flags that may affect performance.
+		 */
+        virtual void writeData(UINT32 offset, UINT32 length, const void* source,
+				BufferWriteType writeFlags = BufferWriteType::Normal) = 0;
+
+		/**
+		 * Copies data from a specific portion of the source buffer into a specific portion of this buffer.
+		 *
+		 * @param[in]	srcBuffer			Buffer to copy from.
+		 * @param[in]	srcOffset			Offset into the source buffer to start copying from, in bytes.
+		 * @param[in]	dstOffset			Offset into this buffer to start copying to, in bytes.
+		 * @param[in]	length				Size of the data to copy, in bytes.
+		 * @param[in]	discardWholeBuffer	Specify true if the data in the current buffer can be entirely discarded. This
+		 *									may improve performance.
+		 */
+		virtual void copyData(HardwareBuffer& srcBuffer, UINT32 srcOffset, 
+			UINT32 dstOffset, UINT32 length, bool discardWholeBuffer = false)
+		{
+			const void *srcData = srcBuffer.lock(
+				srcOffset, length, GBL_READ_ONLY);
+			this->writeData(dstOffset, length, srcData, discardWholeBuffer ? BufferWriteType::Discard : BufferWriteType::Normal);
+			srcBuffer.unlock();
+		}
+
+		/**
+		 * Copy data from the provided buffer into this buffer. If buffers are not the same size, smaller size will be used.
+		 */
+		virtual void copyData(HardwareBuffer& srcBuffer)
+		{
+			UINT32 sz = std::min(getSizeInBytes(), srcBuffer.getSizeInBytes()); 
+			copyData(srcBuffer, 0, 0, sz, true);
+		}
+			
+		/** Returns the size of this buffer in bytes. */
+        UINT32 getSizeInBytes(void) const { return mSizeInBytes; }
+
+		/**	Returns the Usage flags with which this buffer was created. */
+        GpuBufferUsage getUsage() const { return mUsage; }
+
+		/**	Returns whether this buffer is held in system memory. */
+		bool isSystemMemory() const { return mSystemMemory; }
+
+		/**	Returns whether or not this buffer is currently locked. */
+        bool isLocked() const { return mIsLocked; }	
+
+	protected:
+		friend class HardwareBufferManager;
+
+		/**
+		 * Constructs a new buffer.
+		 *
+		 * @param[in]	usage			Determines most common usage of the buffer. Usually has effect on what type of 
+		 *								memory will be buffer allocated in but that depends on render API. Specify dynamic 
+		 *								if you plan on modifying it often, static otherwise.
+		 * @param[in]	systemMemory	If enabled the the buffer will be kept in the system memory. System memory buffers 
+		 *								are often used as a source or destination for copies from/to other buffers. Some 
+		 *								APIs don't allow reading from non-system memory buffers.
+		 */
+		HardwareBuffer(GpuBufferUsage usage, bool systemMemory)
+			: mUsage(usage), mIsLocked(false), mSystemMemory(systemMemory)
+		{  }
+
+		/** @copydoc lock */
+		virtual void* lockImpl(UINT32 offset, UINT32 length, GpuLockOptions options) = 0;
+
+		/** @copydoc unlock */
+		virtual void unlockImpl() = 0;
+
+	protected:
+		UINT32 mSizeInBytes;
+		GpuBufferUsage mUsage;
+		bool mIsLocked;
+		UINT32 mLockStart;
+		UINT32 mLockSize;
+		bool mSystemMemory;
+    };
+
+	/** @} */
+	/** @endcond */
+}

+ 11 - 1
BansheeCore/Include/BsMaterial.h

@@ -12,7 +12,7 @@
 
 namespace BansheeEngine
 {
-	/** @addtogroup Material
+	/** @addtogroup Implementation
 	 *  @{
 	 */
 
@@ -659,6 +659,14 @@ namespace BansheeEngine
 		TechniqueType mBestTechnique;
 	};
 
+	/** @} */
+
+	/** @addtogroup Material
+	 *  @{
+	 */
+
+	/** @cond INTERNAL */
+
 	/** @copydoc MaterialBase */
 	class BS_CORE_EXPORT MaterialCore : public CoreObjectCore, public TMaterial<true>
 	{
@@ -684,6 +692,8 @@ namespace BansheeEngine
 		void syncToCore(const CoreSyncData& data) override;
 	};
 
+	/** @endcond */
+
 	/** @copydoc MaterialBase */
 	class BS_CORE_EXPORT Material : public Resource, public TMaterial<false>, public IResourceListener
 	{

+ 7 - 1
BansheeCore/Include/BsMaterialParam.h

@@ -5,7 +5,7 @@
 
 namespace BansheeEngine
 {
-	/** @addtogroup Material
+	/** @addtogroup Implementation
 	 *  @{
 	 */
 
@@ -255,6 +255,12 @@ namespace BansheeEngine
 		SPtr<Vector<TGpuParamSampState<true>>> mParams;
 	};
 
+	/** @} */
+
+	/** @addtogroup Material
+	 *  @{
+	 */
+
 	typedef TMaterialDataParam<float, false> MaterialParamFloat;
 	typedef TMaterialDataParam<Color, false> MaterialParamColor;
 	typedef TMaterialDataParam<Vector2, false> MaterialParamVec2;

+ 17 - 0
BansheeCore/Include/BsPass.h

@@ -27,6 +27,8 @@ namespace BansheeEngine
 		GpuProgramPtr computeProgram;
 	};
 
+	/** @cond INTERNAL */
+
 	/** Descriptor structure used for initializing a core thread variant of a shader pass. */
 	struct PASS_DESC_CORE
 	{
@@ -43,6 +45,13 @@ namespace BansheeEngine
 		SPtr<GpuProgramCore> computeProgram;
 	};
 
+	/** @endcond */
+	/** @} */
+
+	/** @addtogroup Implementation
+	 *  @{
+	 */
+
 	/** Contains all data used by a pass, templated so it may contain both core and sim thread data. */
 	template<bool Core>
 	struct TPassTypes
@@ -119,6 +128,13 @@ namespace BansheeEngine
 		PassDescType mData;
     };
 
+	/** @} */
+
+	/** @addtogroup Material
+	 *  @{
+	 */
+	/** @cond INTERNAL */
+
 	/**
 	 * @copydoc	PassBase
 	 *
@@ -142,6 +158,7 @@ namespace BansheeEngine
 		/** @copydoc CoreObjectCore::syncToCore */
 		void syncToCore(const CoreSyncData& data) override;
     };
+	/** @endcond */
 
 	/**
 	 * @copydoc	PassBase

+ 20 - 3
BansheeCore/Include/BsShader.h

@@ -50,6 +50,12 @@ namespace BansheeEngine
 		GpuParamBlockUsage usage;
 	};
 
+	/** @} */
+
+	/** @addtogroup Implementation
+	 *  @{
+	 */
+
 	/** Structure used for initializing a shader. */
 	template<bool Core>
 	struct BS_CORE_EXPORT TSHADER_DESC
@@ -213,9 +219,6 @@ namespace BansheeEngine
 		void addParameterInternal(const String& name, const String& gpuVariableName, GpuParamObjectType type, StringID rendererSemantic, UINT32 defaultValueIdx);
 	};
 
-	typedef TSHADER_DESC<true> SHADER_DESC_CORE;
-	typedef TSHADER_DESC<false> SHADER_DESC;
-
 	/**
 	 * Shader represents a collection of techniques. They are used in Materials, which can be considered as instances of a 
 	 * Shader. Multiple materials may share the same shader but provide different parameters to it.
@@ -358,6 +361,16 @@ namespace BansheeEngine
 		UINT32 mId;
 	};
 
+	/** @} */
+
+	/** @addtogroup Material
+	 *  @{
+	 */
+
+	/** @cond INTERNAL */
+
+	typedef TSHADER_DESC<true> SHADER_DESC_CORE;
+	
 	/** @copydoc ShaderBase */
 	class BS_CORE_EXPORT ShaderCore : public CoreObjectCore, public TShader<true>
 	{
@@ -373,6 +386,10 @@ namespace BansheeEngine
 		static std::atomic<UINT32> mNextShaderId;
 	};
 
+	/** @endcond */
+
+	typedef TSHADER_DESC<false> SHADER_DESC;
+
 	/** @copydoc ShaderBase */
 	class BS_CORE_EXPORT Shader : public Resource, public TShader<false>
 	{

+ 11 - 1
BansheeCore/Include/BsTechnique.h

@@ -6,7 +6,7 @@
 
 namespace BansheeEngine
 {
-	/** @addtogroup Material
+	/** @addtogroup Implementation
 	 *  @{
 	 */
 
@@ -66,6 +66,14 @@ namespace BansheeEngine
 		Vector<SPtr<PassType>> mPasses;
 	};
 
+	/** @} */
+
+	/** @addtogroup Material
+	 *  @{
+	 */
+
+	/** @cond INTERNAL */
+
 	/**
 	 * @copydoc	TechniqueBase
 	 *
@@ -80,6 +88,8 @@ namespace BansheeEngine
 		static SPtr<TechniqueCore> create(const StringID& renderAPI, const StringID& renderer, const Vector<SPtr<PassCore>>& passes);
 	};
 
+	/** @endcond */
+
 	/**
 	 * @copydoc	TechniqueBase
 	 *

+ 107 - 111
BansheeCore/Source/BsGpuProgram.cpp

@@ -1,112 +1,108 @@
-#include "BsGpuProgram.h"
-#include "BsVector3.h"
-#include "BsVector4.h"
-#include "BsRenderAPICapabilities.h"
-#include "BsException.h"
-#include "BsRenderAPI.h"
-#include "BsAsyncOp.h"
-#include "BsGpuParams.h"
-#include "BsGpuProgramManager.h"
-#include "BsResources.h"
-#include "BsGpuProgramRTTI.h"
-
-namespace BansheeEngine
-{
-	GpuProgramProperties::GpuProgramProperties(const String& source, const String& entryPoint,
-		GpuProgramType gptype, GpuProgramProfile profile)
-		:mSource(source), mEntryPoint(entryPoint), mType(gptype), mProfile(profile)
-	{ }
-		
-	GpuProgramCore::GpuProgramCore(const String& source, const String& entryPoint,
-		GpuProgramType gptype, GpuProgramProfile profile, bool isAdjacencyInfoRequired)
-		: mProperties(source, entryPoint, gptype, profile), mIsCompiled(false),
-		mNeedsAdjacencyInfo(isAdjacencyInfoRequired)
-	{
-		mParametersDesc = bs_shared_ptr_new<GpuParamDesc>();
-	}
-
-	bool GpuProgramCore::isSupported() const
-    {
-		if (!isRequiredCapabilitiesSupported())
-			return false;
-
-		RenderAPICore* rs = BansheeEngine::RenderAPICore::instancePtr();
-		String profile = rs->getCapabilities()->gpuProgProfileToRSSpecificProfile(getProperties().getProfile());
-
-		return rs->getCapabilities()->isShaderProfileSupported(profile);
-    }
-
-	bool GpuProgramCore::isRequiredCapabilitiesSupported() const
-	{
-		return true;
-	}
-
-	SPtr<GpuParamsCore> GpuProgramCore::createParameters()
-	{
-		return GpuParamsCore::create(mParametersDesc, RenderAPICore::instance().getGpuProgramHasColumnMajorMatrices());
-	}
-
-	SPtr<GpuProgramCore> GpuProgramCore::create(const String& source, const String& entryPoint, const String& language, GpuProgramType gptype,
-		GpuProgramProfile profile, bool requiresAdjacency)
-	{
-		return GpuProgramCoreManager::instance().create(source, entryPoint, language, gptype, profile, requiresAdjacency);
-	}
-
-	GpuProgram::GpuProgram(const String& source, const String& entryPoint, const String& language,
-		GpuProgramType gptype, GpuProgramProfile profile, bool isAdjacencyInfoRequired) 
-		: mProperties(source, entryPoint, gptype, profile), mLanguage(language),
-		 mNeedsAdjacencyInfo(isAdjacencyInfoRequired)
-    {
-
-    }
-
-	bool GpuProgram::isCompiled() const
-	{
-		return getCore()->isCompiled();
-	}
-
-	String GpuProgram::getCompileErrorMessage() const
-	{
-		return getCore()->getCompileErrorMessage();
-	}
-
-	GpuParamsPtr GpuProgram::createParameters()
-	{
-		return GpuParams::create(getCore()->getParamDesc(), RenderAPICore::instance().getGpuProgramHasColumnMajorMatrices());
-	}
-
-	GpuParamDescPtr GpuProgram::getParamDesc() const
-	{
-		return getCore()->getParamDesc();
-	}
-
-	SPtr<GpuProgramCore> GpuProgram::getCore() const
-	{
-		return std::static_pointer_cast<GpuProgramCore>(mCoreSpecific);
-	}
-
-	SPtr<CoreObjectCore> GpuProgram::createCore() const
-	{
-		return GpuProgramCoreManager::instance().createInternal(mProperties.getSource(), mProperties.getEntryPoint(),
-			mLanguage, mProperties.getType(), mProperties.getProfile(), mNeedsAdjacencyInfo);
-	}
-
-	GpuProgramPtr GpuProgram::create(const String& source, const String& entryPoint, const String& language, GpuProgramType gptype,
-		GpuProgramProfile profile, bool requiresAdjacency)
-	{
-		return GpuProgramManager::instance().create(source, entryPoint, language, gptype, profile, requiresAdjacency);
-	}
-
-	/************************************************************************/
-	/* 								SERIALIZATION                      		*/
-	/************************************************************************/
-	RTTITypeBase* GpuProgram::getRTTIStatic()
-	{
-		return GpuProgramRTTI::instance();
-	}
-
-	RTTITypeBase* GpuProgram::getRTTI() const
-	{
-		return GpuProgram::getRTTIStatic();
-	}
+#include "BsGpuProgram.h"
+#include "BsRenderAPICapabilities.h"
+#include "BsRenderAPI.h"
+#include "BsGpuParams.h"
+#include "BsGpuParamDesc.h"
+#include "BsGpuProgramManager.h"
+#include "BsGpuProgramRTTI.h"
+
+namespace BansheeEngine
+{
+	GpuProgramProperties::GpuProgramProperties(const String& source, const String& entryPoint,
+		GpuProgramType gptype, GpuProgramProfile profile)
+		:mSource(source), mEntryPoint(entryPoint), mType(gptype), mProfile(profile)
+	{ }
+		
+	GpuProgramCore::GpuProgramCore(const String& source, const String& entryPoint,
+		GpuProgramType gptype, GpuProgramProfile profile, bool isAdjacencyInfoRequired)
+		: mProperties(source, entryPoint, gptype, profile), mIsCompiled(false),
+		mNeedsAdjacencyInfo(isAdjacencyInfoRequired)
+	{
+		mParametersDesc = bs_shared_ptr_new<GpuParamDesc>();
+	}
+
+	bool GpuProgramCore::isSupported() const
+    {
+		if (!isRequiredCapabilitiesSupported())
+			return false;
+
+		RenderAPICore* rs = BansheeEngine::RenderAPICore::instancePtr();
+		String profile = rs->getCapabilities()->gpuProgProfileToRSSpecificProfile(getProperties().getProfile());
+
+		return rs->getCapabilities()->isShaderProfileSupported(profile);
+    }
+
+	bool GpuProgramCore::isRequiredCapabilitiesSupported() const
+	{
+		return true;
+	}
+
+	SPtr<GpuParamsCore> GpuProgramCore::createParameters()
+	{
+		return GpuParamsCore::create(mParametersDesc, RenderAPICore::instance().getGpuProgramHasColumnMajorMatrices());
+	}
+
+	SPtr<GpuProgramCore> GpuProgramCore::create(const String& source, const String& entryPoint, const String& language, GpuProgramType gptype,
+		GpuProgramProfile profile, bool requiresAdjacency)
+	{
+		return GpuProgramCoreManager::instance().create(source, entryPoint, language, gptype, profile, requiresAdjacency);
+	}
+
+	GpuProgram::GpuProgram(const String& source, const String& entryPoint, const String& language,
+		GpuProgramType gptype, GpuProgramProfile profile, bool isAdjacencyInfoRequired) 
+		: mProperties(source, entryPoint, gptype, profile), mLanguage(language),
+		 mNeedsAdjacencyInfo(isAdjacencyInfoRequired)
+    {
+
+    }
+
+	bool GpuProgram::isCompiled() const
+	{
+		return getCore()->isCompiled();
+	}
+
+	String GpuProgram::getCompileErrorMessage() const
+	{
+		return getCore()->getCompileErrorMessage();
+	}
+
+	GpuParamsPtr GpuProgram::createParameters()
+	{
+		return GpuParams::create(getCore()->getParamDesc(), RenderAPICore::instance().getGpuProgramHasColumnMajorMatrices());
+	}
+
+	GpuParamDescPtr GpuProgram::getParamDesc() const
+	{
+		return getCore()->getParamDesc();
+	}
+
+	SPtr<GpuProgramCore> GpuProgram::getCore() const
+	{
+		return std::static_pointer_cast<GpuProgramCore>(mCoreSpecific);
+	}
+
+	SPtr<CoreObjectCore> GpuProgram::createCore() const
+	{
+		return GpuProgramCoreManager::instance().createInternal(mProperties.getSource(), mProperties.getEntryPoint(),
+			mLanguage, mProperties.getType(), mProperties.getProfile(), mNeedsAdjacencyInfo);
+	}
+
+	GpuProgramPtr GpuProgram::create(const String& source, const String& entryPoint, const String& language, GpuProgramType gptype,
+		GpuProgramProfile profile, bool requiresAdjacency)
+	{
+		return GpuProgramManager::instance().create(source, entryPoint, language, gptype, profile, requiresAdjacency);
+	}
+
+	/************************************************************************/
+	/* 								SERIALIZATION                      		*/
+	/************************************************************************/
+	RTTITypeBase* GpuProgram::getRTTIStatic()
+	{
+		return GpuProgramRTTI::instance();
+	}
+
+	RTTITypeBase* GpuProgram::getRTTI() const
+	{
+		return GpuProgram::getRTTIStatic();
+	}
 }

+ 453 - 457
BansheeCore/Source/BsRenderAPI.cpp

@@ -1,458 +1,454 @@
-#include "BsRenderAPI.h"
-
-#include "BsCoreThread.h"
-#include "BsViewport.h"
-#include "BsException.h"
-#include "BsRenderTarget.h"
-#include "BsRenderWindow.h"
-#include "BsPixelBuffer.h"
-#include "BsOcclusionQuery.h"
-#include "BsResource.h"
-#include "BsCoreThread.h"
-#include "BsMesh.h"
-#include "BsProfilerCPU.h"
-#include "BsRenderStats.h"
-#include "BsGpuParams.h"
-#include "BsBlendState.h"
-#include "BsDepthStencilState.h"
-#include "BsRasterizerState.h"
-
-using namespace std::placeholders;
-
-namespace BansheeEngine 
-{
-	void RenderAPI::disableTextureUnit(CoreAccessor& accessor, GpuProgramType gptype, UINT16 texUnit)
-	{
-		accessor.queueCommand(std::bind(&RenderAPICore::disableTextureUnit, RenderAPICore::instancePtr(), gptype, texUnit));
-	}
-
-	void RenderAPI::setTexture(CoreAccessor& accessor, GpuProgramType gptype, UINT16 unit, bool enabled, const TexturePtr &texPtr)
-	{
-		accessor.queueCommand(std::bind(&RenderAPICore::setTexture, RenderAPICore::instancePtr(), gptype, unit, enabled, texPtr->getCore()));
-	}
-
-	void RenderAPI::setLoadStoreTexture(CoreAccessor& accessor, GpuProgramType gptype, UINT16 unit, bool enabled, const TexturePtr& texPtr,
-		const TextureSurface& surface)
-	{
-		accessor.queueCommand(std::bind(&RenderAPICore::setLoadStoreTexture, RenderAPICore::instancePtr(), gptype, unit, enabled, texPtr->getCore(),
-			surface));
-	}
-
-	void RenderAPI::setSamplerState(CoreAccessor& accessor, GpuProgramType gptype, UINT16 texUnit, const SamplerStatePtr& samplerState)
-	{
-		accessor.queueCommand(std::bind(&RenderAPICore::setSamplerState, RenderAPICore::instancePtr(), gptype, texUnit, samplerState->getCore()));
-	}
-
-	void RenderAPI::setBlendState(CoreAccessor& accessor, const BlendStatePtr& blendState)
-	{
-		accessor.queueCommand(std::bind(&RenderAPICore::setBlendState, RenderAPICore::instancePtr(), blendState->getCore()));
-	}
-
-	void RenderAPI::setRasterizerState(CoreAccessor& accessor, const RasterizerStatePtr& rasterizerState)
-	{
-		accessor.queueCommand(std::bind(&RenderAPICore::setRasterizerState, RenderAPICore::instancePtr(), rasterizerState->getCore()));
-	}
-
-	void RenderAPI::setDepthStencilState(CoreAccessor& accessor, const DepthStencilStatePtr& depthStencilState, UINT32 stencilRefValue)
-	{
-		accessor.queueCommand(std::bind(&RenderAPICore::setDepthStencilState, RenderAPICore::instancePtr(), depthStencilState->getCore(), stencilRefValue));
-	}
-
-	void RenderAPI::setVertexBuffers(CoreAccessor& accessor, UINT32 index, const Vector<VertexBufferPtr>& buffers)
-	{
-		Vector<SPtr<VertexBufferCore>> coreBuffers(buffers.size());
-		for (UINT32 i = 0; i < (UINT32)buffers.size(); i++)
-			coreBuffers[i] = buffers[i] != nullptr ? buffers[i]->getCore() : nullptr;
-
-		std::function<void(RenderAPICore*, UINT32, const Vector<SPtr<VertexBufferCore>>&)> resizeFunc =
-			[](RenderAPICore* rs, UINT32 idx, const Vector<SPtr<VertexBufferCore>>& _buffers)
-		{
-			rs->setVertexBuffers(idx, (SPtr<VertexBufferCore>*)_buffers.data(), (UINT32)_buffers.size());
-		};
-
-		accessor.queueCommand(std::bind(resizeFunc, RenderAPICore::instancePtr(), index, coreBuffers));
-	}
-
-	void RenderAPI::setIndexBuffer(CoreAccessor& accessor, const IndexBufferPtr& buffer)
-	{
-		accessor.queueCommand(std::bind(&RenderAPICore::setIndexBuffer, RenderAPICore::instancePtr(), buffer->getCore()));
-	}
-
-	void RenderAPI::setVertexDeclaration(CoreAccessor& accessor, const VertexDeclarationPtr& vertexDeclaration)
-	{
-		accessor.queueCommand(std::bind(&RenderAPICore::setVertexDeclaration, RenderAPICore::instancePtr(), vertexDeclaration->getCore()));
-	}
-
-	void RenderAPI::setViewport(CoreAccessor& accessor, const Rect2& vp)
-	{
-		accessor.queueCommand(std::bind(&RenderAPICore::setViewport, RenderAPICore::instancePtr(), vp));
-	}
-
-	void RenderAPI::setDrawOperation(CoreAccessor& accessor, DrawOperationType op)
-	{
-		accessor.queueCommand(std::bind(&RenderAPICore::setDrawOperation, RenderAPICore::instancePtr(), op));
-	}
-
-	void RenderAPI::setClipPlanes(CoreAccessor& accessor, const PlaneList& clipPlanes)
-	{
-		accessor.queueCommand(std::bind(&RenderAPICore::setClipPlanes, RenderAPICore::instancePtr(), clipPlanes));
-	}
-
-	void RenderAPI::addClipPlane(CoreAccessor& accessor, const Plane& p)
-	{
-		accessor.queueCommand(std::bind(&RenderAPICore::addClipPlane, RenderAPICore::instancePtr(), p));
-	}
-
-	void RenderAPI::resetClipPlanes(CoreAccessor& accessor)
-	{
-		accessor.queueCommand(std::bind(&RenderAPICore::resetClipPlanes, RenderAPICore::instancePtr()));
-	}
-
-	void RenderAPI::setScissorTest(CoreAccessor& accessor, UINT32 left, UINT32 top, UINT32 right, UINT32 bottom)
-	{
-		accessor.queueCommand(std::bind(&RenderAPICore::setScissorRect, RenderAPICore::instancePtr(), left, top, right, bottom));
-	}
-
-	void RenderAPI::setRenderTarget(CoreAccessor& accessor, const RenderTargetPtr& target)
-	{
-		accessor.queueCommand(std::bind(&RenderAPICore::setRenderTarget, RenderAPICore::instancePtr(), target->getCore()));
-	}
-
-	void RenderAPI::bindGpuProgram(CoreAccessor& accessor, const GpuProgramPtr& prg)
-	{
-		prg->syncToCore(accessor);
-		accessor.queueCommand(std::bind(&RenderAPICore::bindGpuProgram, RenderAPICore::instancePtr(), prg->getCore()));
-	}
-
-	void RenderAPI::unbindGpuProgram(CoreAccessor& accessor, GpuProgramType gptype)
-	{
-		accessor.queueCommand(std::bind(&RenderAPICore::unbindGpuProgram, RenderAPICore::instancePtr(), gptype));
-	}
-
-	void RenderAPI::setConstantBuffers(CoreAccessor& accessor, GpuProgramType gptype, const GpuParamsPtr& params)
-	{
-		accessor.queueCommand(std::bind(&RenderAPICore::setConstantBuffers, RenderAPICore::instancePtr(), gptype, params->getCore()));
-	}
-
-	void RenderAPI::setGpuParams(CoreAccessor& accessor, GpuProgramType gptype, const GpuParamsPtr& params)
-	{
-		accessor.queueCommand(std::bind(&RenderAPICore::setGpuParams, RenderAPICore::instancePtr(), gptype, params->getCore()));
-	}
-
-	void RenderAPI::beginRender(CoreAccessor& accessor)
-	{
-		accessor.queueCommand(std::bind(&RenderAPICore::beginFrame, RenderAPICore::instancePtr()));
-	}
-
-	void RenderAPI::endRender(CoreAccessor& accessor)
-	{
-		accessor.queueCommand(std::bind(&RenderAPICore::endFrame, RenderAPICore::instancePtr()));
-	}
-
-	void RenderAPI::clearRenderTarget(CoreAccessor& accessor, UINT32 buffers, const Color& color, float depth, UINT16 stencil)
-	{
-		accessor.queueCommand(std::bind(&RenderAPICore::clearRenderTarget, RenderAPICore::instancePtr(), buffers, color, depth, stencil));
-	}
-
-	void RenderAPI::clearViewport(CoreAccessor& accessor, UINT32 buffers, const Color& color, float depth, UINT16 stencil)
-	{
-		accessor.queueCommand(std::bind(&RenderAPICore::clearViewport, RenderAPICore::instancePtr(), buffers, color, depth, stencil));
-	}
-
-	void RenderAPI::swapBuffers(CoreAccessor& accessor, const RenderTargetPtr& target)
-	{
-		accessor.queueCommand(std::bind(&RenderAPICore::swapBuffers, RenderAPICore::instancePtr(), target->getCore()));
-	}
-
-	void RenderAPI::draw(CoreAccessor& accessor, UINT32 vertexOffset, UINT32 vertexCount)
-	{
-		accessor.queueCommand(std::bind(&RenderAPICore::draw, RenderAPICore::instancePtr(), vertexOffset, vertexCount));
-	}
-
-	void RenderAPI::drawIndexed(CoreAccessor& accessor, UINT32 startIndex, UINT32 indexCount, UINT32 vertexOffset, UINT32 vertexCount)
-	{
-		accessor.queueCommand(std::bind(&RenderAPICore::drawIndexed, RenderAPICore::instancePtr(), startIndex, indexCount, vertexOffset, vertexCount));
-	}
-
-	const VideoModeInfo& RenderAPI::getVideoModeInfo()
-	{
-		return RenderAPICore::instance().getVideoModeInfo();
-	}
-
-	VertexElementType RenderAPI::getColorVertexElementType()
-	{
-		return RenderAPICore::instance().getColorVertexElementType();
-	}
-
-	void RenderAPI::convertProjectionMatrix(const Matrix4& matrix, Matrix4& dest)
-	{
-		RenderAPICore::instance().convertProjectionMatrix(matrix, dest);
-	}
-
-	float RenderAPI::getHorizontalTexelOffset()
-	{
-		return RenderAPICore::instance().getHorizontalTexelOffset();
-	}
-
-	float RenderAPI::getVerticalTexelOffset()
-	{
-		return RenderAPICore::instance().getVerticalTexelOffset();
-	}
-
-	float RenderAPI::getMinimumDepthInputValue()
-	{
-		return RenderAPICore::instance().getMinimumDepthInputValue();
-	}
-
-	float RenderAPI::getMaximumDepthInputValue()
-	{
-		return RenderAPICore::instance().getMaximumDepthInputValue();
-	}
-
-	bool RenderAPI::getVertexColorFlipRequired()
-	{
-		return RenderAPICore::instance().getVertexColorFlipRequired();
-	}
-
-    RenderAPICore::RenderAPICore()
-        : mCullingMode(CULL_COUNTERCLOCKWISE)
-        , mDisabledTexUnitsFrom(0)
-        , mVertexProgramBound(false)
-		, mGeometryProgramBound(false)
-        , mFragmentProgramBound(false)
-		, mDomainProgramBound(false)
-		, mHullProgramBound(false)
-		, mComputeProgramBound(false)
-		, mClipPlanesDirty(true)
-		, mCurrentCapabilities(nullptr)
-    {
-    }
-
-    RenderAPICore::~RenderAPICore()
-    {
-		// Base classes need to call virtual destroy_internal method instead of a destructor
-
-		bs_delete(mCurrentCapabilities);
-		mCurrentCapabilities = nullptr;
-    }
-
-	RenderWindowPtr RenderAPICore::initialize(const RENDER_WINDOW_DESC& primaryWindowDesc)
-	{
-		gCoreThread().queueCommand(std::bind(&RenderAPICore::initializePrepare, this), true);
-
-		RENDER_WINDOW_DESC windowDesc = primaryWindowDesc;
-		RenderWindowPtr renderWindow = RenderWindow::create(windowDesc, nullptr);
-
-		gCoreThread().queueCommand(std::bind(&RenderAPICore::initializeFinalize, this, renderWindow->getCore()), true);
-
-		return renderWindow;
-	}
-
-	void RenderAPICore::initializePrepare()
-	{
-		// Do nothing
-	}
-
-	void RenderAPICore::initializeFinalize(const SPtr<RenderWindowCore>& primaryWindow)
-	{
-		THROW_IF_NOT_CORE_THREAD;
-
-		mVertexProgramBound = false;
-		mGeometryProgramBound = false;
-		mFragmentProgramBound = false;
-		mDomainProgramBound = false;
-		mHullProgramBound = false;
-		mComputeProgramBound = false;
-	}
-
-	void RenderAPICore::destroy()
-	{
-		gCoreAccessor().queueCommand(std::bind(&RenderAPICore::destroyCore, this));
-		gCoreThread().submitAccessors(true);
-	}
-
-	void RenderAPICore::destroyCore()
-	{
-		mActiveRenderTarget = nullptr;
-	}
-
-	const RenderAPICapabilities* RenderAPICore::getCapabilities(void) const 
-	{ 
-		return mCurrentCapabilities; 
-	}
-
-	const DriverVersion& RenderAPICore::getDriverVersion(void) const 
-	{ 
-		THROW_IF_NOT_CORE_THREAD;
-
-		return mDriverVersion; 
-	}
-
-    void RenderAPICore::disableTextureUnit(GpuProgramType gptype, UINT16 texUnit)
-    {
-		THROW_IF_NOT_CORE_THREAD;
-
-		setTexture(gptype, texUnit, false, SPtr<TextureCore>());
-    }
-
-	void RenderAPICore::addClipPlane(const Plane &p)
-	{
-		THROW_IF_NOT_CORE_THREAD;
-
-		mClipPlanes.push_back(p);
-		mClipPlanesDirty = true;
-	}
-
-	void RenderAPICore::setClipPlanes(const PlaneList& clipPlanes)
-	{
-		THROW_IF_NOT_CORE_THREAD;
-
-		if (clipPlanes != mClipPlanes)
-		{
-			mClipPlanes = clipPlanes;
-			mClipPlanesDirty = true;
-		}
-	}
-
-	void RenderAPICore::resetClipPlanes()
-	{
-		THROW_IF_NOT_CORE_THREAD;
-
-		if (!mClipPlanes.empty())
-		{
-			mClipPlanes.clear();
-			mClipPlanesDirty = true;
-		}
-	}
-
-	void RenderAPICore::bindGpuProgram(const SPtr<GpuProgramCore>& prg)
-	{
-		THROW_IF_NOT_CORE_THREAD;
-
-		switch(prg->getProperties().getType())
-		{
-		case GPT_VERTEX_PROGRAM:
-			if (!mVertexProgramBound && !mClipPlanes.empty())
-				mClipPlanesDirty = true;
-
-			mVertexProgramBound = true;
-			break;
-		case GPT_GEOMETRY_PROGRAM:
-			mGeometryProgramBound = true;
-			break;
-		case GPT_FRAGMENT_PROGRAM:
-			mFragmentProgramBound = true;
-			break;
-		case GPT_DOMAIN_PROGRAM:
-			mDomainProgramBound = true;
-			break;
-		case GPT_HULL_PROGRAM:
-			mHullProgramBound = true;
-			break;
-		case GPT_COMPUTE_PROGRAM:
-			mComputeProgramBound = true;
-			break;
-		}
-	}
-
-	void RenderAPICore::unbindGpuProgram(GpuProgramType gptype)
-	{
-		THROW_IF_NOT_CORE_THREAD;
-
-		switch(gptype)
-		{
-		case GPT_VERTEX_PROGRAM:
-			if (mVertexProgramBound && !mClipPlanes.empty())
-				mClipPlanesDirty = true;
-
-			mVertexProgramBound = false;
-			break;
-		case GPT_GEOMETRY_PROGRAM:
-			mGeometryProgramBound = false;
-			break;
-		case GPT_FRAGMENT_PROGRAM:
-			mFragmentProgramBound = false;
-			break;
-		case GPT_DOMAIN_PROGRAM:
-			mDomainProgramBound = false;
-			break;
-		case GPT_HULL_PROGRAM:
-			mHullProgramBound = false;
-			break;
-		case GPT_COMPUTE_PROGRAM:
-			mComputeProgramBound = false;
-			break;
-		}
-	}
-
-	bool RenderAPICore::isGpuProgramBound(GpuProgramType gptype)
-	{
-		THROW_IF_NOT_CORE_THREAD;
-
-	    switch(gptype)
-	    {
-        case GPT_VERTEX_PROGRAM:
-            return mVertexProgramBound;
-        case GPT_GEOMETRY_PROGRAM:
-            return mGeometryProgramBound;
-        case GPT_FRAGMENT_PROGRAM:
-            return mFragmentProgramBound;
-		case GPT_DOMAIN_PROGRAM:
-			return mDomainProgramBound;
-		case GPT_HULL_PROGRAM:
-			return mHullProgramBound;
-		case GPT_COMPUTE_PROGRAM:
-			return mComputeProgramBound;
-	    }
-
-        return false;
-	}
-
-	void RenderAPICore::setGpuParams(GpuProgramType gptype, const SPtr<GpuParamsCore>& params)
-	{
-		const GpuParamDesc& paramDesc = params->getParamDesc();
-
-		for (auto iter = paramDesc.samplers.begin(); iter != paramDesc.samplers.end(); ++iter)
-		{
-			SPtr<SamplerStateCore> samplerState = params->getSamplerState(iter->second.slot);
-
-			if (samplerState == nullptr)
-				setSamplerState(gptype, iter->second.slot, SamplerStateCore::getDefault());
-			else
-				setSamplerState(gptype, iter->second.slot, samplerState);
-		}
-
-		for (auto iter = paramDesc.textures.begin(); iter != paramDesc.textures.end(); ++iter)
-		{
-			SPtr<TextureCore> texture = params->getTexture(iter->second.slot);
-
-			if (!params->isLoadStoreTexture(iter->second.slot))
-			{
-				if (texture == nullptr)
-					setTexture(gptype, iter->second.slot, false, nullptr);
-				else
-					setTexture(gptype, iter->second.slot, true, texture);
-			}
-			else
-			{
-				const TextureSurface& surface = params->getLoadStoreSurface(iter->second.slot);
-
-				if (texture == nullptr)
-					setLoadStoreTexture(gptype, iter->second.slot, false, nullptr, surface);
-				else
-					setLoadStoreTexture(gptype, iter->second.slot, true, texture, surface);
-			}
-		}
-
-		setConstantBuffers(gptype, params);
-	}
-
-	void RenderAPICore::swapBuffers(const SPtr<RenderTargetCore>& target)
-	{
-		THROW_IF_NOT_CORE_THREAD;
-
-		target->swapBuffers();
-
-		BS_INC_RENDER_STAT(NumPresents);
-	}
+#include "BsRenderAPI.h"
+
+#include "BsCoreThread.h"
+#include "BsViewport.h"
+#include "BsRenderTarget.h"
+#include "BsRenderWindow.h"
+#include "BsResource.h"
+#include "BsMesh.h"
+#include "BsRenderStats.h"
+#include "BsGpuParams.h"
+#include "BsBlendState.h"
+#include "BsDepthStencilState.h"
+#include "BsRasterizerState.h"
+#include "BsGpuParamDesc.h"
+
+using namespace std::placeholders;
+
+namespace BansheeEngine 
+{
+	void RenderAPI::disableTextureUnit(CoreAccessor& accessor, GpuProgramType gptype, UINT16 texUnit)
+	{
+		accessor.queueCommand(std::bind(&RenderAPICore::disableTextureUnit, RenderAPICore::instancePtr(), gptype, texUnit));
+	}
+
+	void RenderAPI::setTexture(CoreAccessor& accessor, GpuProgramType gptype, UINT16 unit, bool enabled, const TexturePtr &texPtr)
+	{
+		accessor.queueCommand(std::bind(&RenderAPICore::setTexture, RenderAPICore::instancePtr(), gptype, unit, enabled, texPtr->getCore()));
+	}
+
+	void RenderAPI::setLoadStoreTexture(CoreAccessor& accessor, GpuProgramType gptype, UINT16 unit, bool enabled, const TexturePtr& texPtr,
+		const TextureSurface& surface)
+	{
+		accessor.queueCommand(std::bind(&RenderAPICore::setLoadStoreTexture, RenderAPICore::instancePtr(), gptype, unit, enabled, texPtr->getCore(),
+			surface));
+	}
+
+	void RenderAPI::setSamplerState(CoreAccessor& accessor, GpuProgramType gptype, UINT16 texUnit, const SamplerStatePtr& samplerState)
+	{
+		accessor.queueCommand(std::bind(&RenderAPICore::setSamplerState, RenderAPICore::instancePtr(), gptype, texUnit, samplerState->getCore()));
+	}
+
+	void RenderAPI::setBlendState(CoreAccessor& accessor, const BlendStatePtr& blendState)
+	{
+		accessor.queueCommand(std::bind(&RenderAPICore::setBlendState, RenderAPICore::instancePtr(), blendState->getCore()));
+	}
+
+	void RenderAPI::setRasterizerState(CoreAccessor& accessor, const RasterizerStatePtr& rasterizerState)
+	{
+		accessor.queueCommand(std::bind(&RenderAPICore::setRasterizerState, RenderAPICore::instancePtr(), rasterizerState->getCore()));
+	}
+
+	void RenderAPI::setDepthStencilState(CoreAccessor& accessor, const DepthStencilStatePtr& depthStencilState, UINT32 stencilRefValue)
+	{
+		accessor.queueCommand(std::bind(&RenderAPICore::setDepthStencilState, RenderAPICore::instancePtr(), depthStencilState->getCore(), stencilRefValue));
+	}
+
+	void RenderAPI::setVertexBuffers(CoreAccessor& accessor, UINT32 index, const Vector<VertexBufferPtr>& buffers)
+	{
+		Vector<SPtr<VertexBufferCore>> coreBuffers(buffers.size());
+		for (UINT32 i = 0; i < (UINT32)buffers.size(); i++)
+			coreBuffers[i] = buffers[i] != nullptr ? buffers[i]->getCore() : nullptr;
+
+		std::function<void(RenderAPICore*, UINT32, const Vector<SPtr<VertexBufferCore>>&)> resizeFunc =
+			[](RenderAPICore* rs, UINT32 idx, const Vector<SPtr<VertexBufferCore>>& _buffers)
+		{
+			rs->setVertexBuffers(idx, (SPtr<VertexBufferCore>*)_buffers.data(), (UINT32)_buffers.size());
+		};
+
+		accessor.queueCommand(std::bind(resizeFunc, RenderAPICore::instancePtr(), index, coreBuffers));
+	}
+
+	void RenderAPI::setIndexBuffer(CoreAccessor& accessor, const IndexBufferPtr& buffer)
+	{
+		accessor.queueCommand(std::bind(&RenderAPICore::setIndexBuffer, RenderAPICore::instancePtr(), buffer->getCore()));
+	}
+
+	void RenderAPI::setVertexDeclaration(CoreAccessor& accessor, const VertexDeclarationPtr& vertexDeclaration)
+	{
+		accessor.queueCommand(std::bind(&RenderAPICore::setVertexDeclaration, RenderAPICore::instancePtr(), vertexDeclaration->getCore()));
+	}
+
+	void RenderAPI::setViewport(CoreAccessor& accessor, const Rect2& vp)
+	{
+		accessor.queueCommand(std::bind(&RenderAPICore::setViewport, RenderAPICore::instancePtr(), vp));
+	}
+
+	void RenderAPI::setDrawOperation(CoreAccessor& accessor, DrawOperationType op)
+	{
+		accessor.queueCommand(std::bind(&RenderAPICore::setDrawOperation, RenderAPICore::instancePtr(), op));
+	}
+
+	void RenderAPI::setClipPlanes(CoreAccessor& accessor, const PlaneList& clipPlanes)
+	{
+		accessor.queueCommand(std::bind(&RenderAPICore::setClipPlanes, RenderAPICore::instancePtr(), clipPlanes));
+	}
+
+	void RenderAPI::addClipPlane(CoreAccessor& accessor, const Plane& p)
+	{
+		accessor.queueCommand(std::bind(&RenderAPICore::addClipPlane, RenderAPICore::instancePtr(), p));
+	}
+
+	void RenderAPI::resetClipPlanes(CoreAccessor& accessor)
+	{
+		accessor.queueCommand(std::bind(&RenderAPICore::resetClipPlanes, RenderAPICore::instancePtr()));
+	}
+
+	void RenderAPI::setScissorTest(CoreAccessor& accessor, UINT32 left, UINT32 top, UINT32 right, UINT32 bottom)
+	{
+		accessor.queueCommand(std::bind(&RenderAPICore::setScissorRect, RenderAPICore::instancePtr(), left, top, right, bottom));
+	}
+
+	void RenderAPI::setRenderTarget(CoreAccessor& accessor, const RenderTargetPtr& target)
+	{
+		accessor.queueCommand(std::bind(&RenderAPICore::setRenderTarget, RenderAPICore::instancePtr(), target->getCore()));
+	}
+
+	void RenderAPI::bindGpuProgram(CoreAccessor& accessor, const GpuProgramPtr& prg)
+	{
+		prg->syncToCore(accessor);
+		accessor.queueCommand(std::bind(&RenderAPICore::bindGpuProgram, RenderAPICore::instancePtr(), prg->getCore()));
+	}
+
+	void RenderAPI::unbindGpuProgram(CoreAccessor& accessor, GpuProgramType gptype)
+	{
+		accessor.queueCommand(std::bind(&RenderAPICore::unbindGpuProgram, RenderAPICore::instancePtr(), gptype));
+	}
+
+	void RenderAPI::setConstantBuffers(CoreAccessor& accessor, GpuProgramType gptype, const GpuParamsPtr& params)
+	{
+		accessor.queueCommand(std::bind(&RenderAPICore::setConstantBuffers, RenderAPICore::instancePtr(), gptype, params->getCore()));
+	}
+
+	void RenderAPI::setGpuParams(CoreAccessor& accessor, GpuProgramType gptype, const GpuParamsPtr& params)
+	{
+		accessor.queueCommand(std::bind(&RenderAPICore::setGpuParams, RenderAPICore::instancePtr(), gptype, params->getCore()));
+	}
+
+	void RenderAPI::beginRender(CoreAccessor& accessor)
+	{
+		accessor.queueCommand(std::bind(&RenderAPICore::beginFrame, RenderAPICore::instancePtr()));
+	}
+
+	void RenderAPI::endRender(CoreAccessor& accessor)
+	{
+		accessor.queueCommand(std::bind(&RenderAPICore::endFrame, RenderAPICore::instancePtr()));
+	}
+
+	void RenderAPI::clearRenderTarget(CoreAccessor& accessor, UINT32 buffers, const Color& color, float depth, UINT16 stencil)
+	{
+		accessor.queueCommand(std::bind(&RenderAPICore::clearRenderTarget, RenderAPICore::instancePtr(), buffers, color, depth, stencil));
+	}
+
+	void RenderAPI::clearViewport(CoreAccessor& accessor, UINT32 buffers, const Color& color, float depth, UINT16 stencil)
+	{
+		accessor.queueCommand(std::bind(&RenderAPICore::clearViewport, RenderAPICore::instancePtr(), buffers, color, depth, stencil));
+	}
+
+	void RenderAPI::swapBuffers(CoreAccessor& accessor, const RenderTargetPtr& target)
+	{
+		accessor.queueCommand(std::bind(&RenderAPICore::swapBuffers, RenderAPICore::instancePtr(), target->getCore()));
+	}
+
+	void RenderAPI::draw(CoreAccessor& accessor, UINT32 vertexOffset, UINT32 vertexCount)
+	{
+		accessor.queueCommand(std::bind(&RenderAPICore::draw, RenderAPICore::instancePtr(), vertexOffset, vertexCount));
+	}
+
+	void RenderAPI::drawIndexed(CoreAccessor& accessor, UINT32 startIndex, UINT32 indexCount, UINT32 vertexOffset, UINT32 vertexCount)
+	{
+		accessor.queueCommand(std::bind(&RenderAPICore::drawIndexed, RenderAPICore::instancePtr(), startIndex, indexCount, vertexOffset, vertexCount));
+	}
+
+	const VideoModeInfo& RenderAPI::getVideoModeInfo()
+	{
+		return RenderAPICore::instance().getVideoModeInfo();
+	}
+
+	VertexElementType RenderAPI::getColorVertexElementType()
+	{
+		return RenderAPICore::instance().getColorVertexElementType();
+	}
+
+	void RenderAPI::convertProjectionMatrix(const Matrix4& matrix, Matrix4& dest)
+	{
+		RenderAPICore::instance().convertProjectionMatrix(matrix, dest);
+	}
+
+	float RenderAPI::getHorizontalTexelOffset()
+	{
+		return RenderAPICore::instance().getHorizontalTexelOffset();
+	}
+
+	float RenderAPI::getVerticalTexelOffset()
+	{
+		return RenderAPICore::instance().getVerticalTexelOffset();
+	}
+
+	float RenderAPI::getMinimumDepthInputValue()
+	{
+		return RenderAPICore::instance().getMinimumDepthInputValue();
+	}
+
+	float RenderAPI::getMaximumDepthInputValue()
+	{
+		return RenderAPICore::instance().getMaximumDepthInputValue();
+	}
+
+	bool RenderAPI::getVertexColorFlipRequired()
+	{
+		return RenderAPICore::instance().getVertexColorFlipRequired();
+	}
+
+    RenderAPICore::RenderAPICore()
+        : mCullingMode(CULL_COUNTERCLOCKWISE)
+        , mDisabledTexUnitsFrom(0)
+        , mVertexProgramBound(false)
+		, mGeometryProgramBound(false)
+        , mFragmentProgramBound(false)
+		, mDomainProgramBound(false)
+		, mHullProgramBound(false)
+		, mComputeProgramBound(false)
+		, mClipPlanesDirty(true)
+		, mCurrentCapabilities(nullptr)
+    {
+    }
+
+    RenderAPICore::~RenderAPICore()
+    {
+		// Base classes need to call virtual destroy_internal method instead of a destructor
+
+		bs_delete(mCurrentCapabilities);
+		mCurrentCapabilities = nullptr;
+    }
+
+	RenderWindowPtr RenderAPICore::initialize(const RENDER_WINDOW_DESC& primaryWindowDesc)
+	{
+		gCoreThread().queueCommand(std::bind(&RenderAPICore::initializePrepare, this), true);
+
+		RENDER_WINDOW_DESC windowDesc = primaryWindowDesc;
+		RenderWindowPtr renderWindow = RenderWindow::create(windowDesc, nullptr);
+
+		gCoreThread().queueCommand(std::bind(&RenderAPICore::initializeFinalize, this, renderWindow->getCore()), true);
+
+		return renderWindow;
+	}
+
+	void RenderAPICore::initializePrepare()
+	{
+		// Do nothing
+	}
+
+	void RenderAPICore::initializeFinalize(const SPtr<RenderWindowCore>& primaryWindow)
+	{
+		THROW_IF_NOT_CORE_THREAD;
+
+		mVertexProgramBound = false;
+		mGeometryProgramBound = false;
+		mFragmentProgramBound = false;
+		mDomainProgramBound = false;
+		mHullProgramBound = false;
+		mComputeProgramBound = false;
+	}
+
+	void RenderAPICore::destroy()
+	{
+		gCoreAccessor().queueCommand(std::bind(&RenderAPICore::destroyCore, this));
+		gCoreThread().submitAccessors(true);
+	}
+
+	void RenderAPICore::destroyCore()
+	{
+		mActiveRenderTarget = nullptr;
+	}
+
+	const RenderAPICapabilities* RenderAPICore::getCapabilities(void) const 
+	{ 
+		return mCurrentCapabilities; 
+	}
+
+	const DriverVersion& RenderAPICore::getDriverVersion(void) const 
+	{ 
+		THROW_IF_NOT_CORE_THREAD;
+
+		return mDriverVersion; 
+	}
+
+    void RenderAPICore::disableTextureUnit(GpuProgramType gptype, UINT16 texUnit)
+    {
+		THROW_IF_NOT_CORE_THREAD;
+
+		setTexture(gptype, texUnit, false, SPtr<TextureCore>());
+    }
+
+	void RenderAPICore::addClipPlane(const Plane &p)
+	{
+		THROW_IF_NOT_CORE_THREAD;
+
+		mClipPlanes.push_back(p);
+		mClipPlanesDirty = true;
+	}
+
+	void RenderAPICore::setClipPlanes(const PlaneList& clipPlanes)
+	{
+		THROW_IF_NOT_CORE_THREAD;
+
+		if (clipPlanes != mClipPlanes)
+		{
+			mClipPlanes = clipPlanes;
+			mClipPlanesDirty = true;
+		}
+	}
+
+	void RenderAPICore::resetClipPlanes()
+	{
+		THROW_IF_NOT_CORE_THREAD;
+
+		if (!mClipPlanes.empty())
+		{
+			mClipPlanes.clear();
+			mClipPlanesDirty = true;
+		}
+	}
+
+	void RenderAPICore::bindGpuProgram(const SPtr<GpuProgramCore>& prg)
+	{
+		THROW_IF_NOT_CORE_THREAD;
+
+		switch(prg->getProperties().getType())
+		{
+		case GPT_VERTEX_PROGRAM:
+			if (!mVertexProgramBound && !mClipPlanes.empty())
+				mClipPlanesDirty = true;
+
+			mVertexProgramBound = true;
+			break;
+		case GPT_GEOMETRY_PROGRAM:
+			mGeometryProgramBound = true;
+			break;
+		case GPT_FRAGMENT_PROGRAM:
+			mFragmentProgramBound = true;
+			break;
+		case GPT_DOMAIN_PROGRAM:
+			mDomainProgramBound = true;
+			break;
+		case GPT_HULL_PROGRAM:
+			mHullProgramBound = true;
+			break;
+		case GPT_COMPUTE_PROGRAM:
+			mComputeProgramBound = true;
+			break;
+		}
+	}
+
+	void RenderAPICore::unbindGpuProgram(GpuProgramType gptype)
+	{
+		THROW_IF_NOT_CORE_THREAD;
+
+		switch(gptype)
+		{
+		case GPT_VERTEX_PROGRAM:
+			if (mVertexProgramBound && !mClipPlanes.empty())
+				mClipPlanesDirty = true;
+
+			mVertexProgramBound = false;
+			break;
+		case GPT_GEOMETRY_PROGRAM:
+			mGeometryProgramBound = false;
+			break;
+		case GPT_FRAGMENT_PROGRAM:
+			mFragmentProgramBound = false;
+			break;
+		case GPT_DOMAIN_PROGRAM:
+			mDomainProgramBound = false;
+			break;
+		case GPT_HULL_PROGRAM:
+			mHullProgramBound = false;
+			break;
+		case GPT_COMPUTE_PROGRAM:
+			mComputeProgramBound = false;
+			break;
+		}
+	}
+
+	bool RenderAPICore::isGpuProgramBound(GpuProgramType gptype)
+	{
+		THROW_IF_NOT_CORE_THREAD;
+
+	    switch(gptype)
+	    {
+        case GPT_VERTEX_PROGRAM:
+            return mVertexProgramBound;
+        case GPT_GEOMETRY_PROGRAM:
+            return mGeometryProgramBound;
+        case GPT_FRAGMENT_PROGRAM:
+            return mFragmentProgramBound;
+		case GPT_DOMAIN_PROGRAM:
+			return mDomainProgramBound;
+		case GPT_HULL_PROGRAM:
+			return mHullProgramBound;
+		case GPT_COMPUTE_PROGRAM:
+			return mComputeProgramBound;
+	    }
+
+        return false;
+	}
+
+	void RenderAPICore::setGpuParams(GpuProgramType gptype, const SPtr<GpuParamsCore>& params)
+	{
+		const GpuParamDesc& paramDesc = params->getParamDesc();
+
+		for (auto iter = paramDesc.samplers.begin(); iter != paramDesc.samplers.end(); ++iter)
+		{
+			SPtr<SamplerStateCore> samplerState = params->getSamplerState(iter->second.slot);
+
+			if (samplerState == nullptr)
+				setSamplerState(gptype, iter->second.slot, SamplerStateCore::getDefault());
+			else
+				setSamplerState(gptype, iter->second.slot, samplerState);
+		}
+
+		for (auto iter = paramDesc.textures.begin(); iter != paramDesc.textures.end(); ++iter)
+		{
+			SPtr<TextureCore> texture = params->getTexture(iter->second.slot);
+
+			if (!params->isLoadStoreTexture(iter->second.slot))
+			{
+				if (texture == nullptr)
+					setTexture(gptype, iter->second.slot, false, nullptr);
+				else
+					setTexture(gptype, iter->second.slot, true, texture);
+			}
+			else
+			{
+				const TextureSurface& surface = params->getLoadStoreSurface(iter->second.slot);
+
+				if (texture == nullptr)
+					setLoadStoreTexture(gptype, iter->second.slot, false, nullptr, surface);
+				else
+					setLoadStoreTexture(gptype, iter->second.slot, true, texture, surface);
+			}
+		}
+
+		setConstantBuffers(gptype, params);
+	}
+
+	void RenderAPICore::swapBuffers(const SPtr<RenderTargetCore>& target)
+	{
+		THROW_IF_NOT_CORE_THREAD;
+
+		target->swapBuffers();
+
+		BS_INC_RENDER_STAT(NumPresents);
+	}
 }

+ 1143 - 1142
BansheeD3D11RenderAPI/Source/BsD3D11RenderAPI.cpp

@@ -1,1143 +1,1144 @@
-#include "BsD3D11RenderAPI.h"
-#include "BsD3D11DriverList.h"
-#include "BsD3D11Driver.h"
-#include "BsD3D11Device.h"
-#include "BsD3D11TextureManager.h"
-#include "BsD3D11Texture.h"
-#include "BsD3D11HardwareBufferManager.h"
-#include "BsD3D11RenderWindowManager.h"
-#include "BsD3D11HLSLProgramFactory.h"
-#include "BsD3D11BlendState.h"
-#include "BsD3D11RasterizerState.h"
-#include "BsD3D11DepthStencilState.h"
-#include "BsD3D11SamplerState.h"
-#include "BsD3D11GpuProgram.h"
-#include "BsD3D11Mappings.h"
-#include "BsD3D11VertexBuffer.h"
-#include "BsD3D11IndexBuffer.h"
-#include "BsD3D11RenderStateManager.h"
-#include "BsD3D11GpuParamBlockBuffer.h"
-#include "BsD3D11InputLayoutManager.h"
-#include "BsD3D11TextureView.h"
-#include "BsD3D11RenderUtility.h"
-#include "BsGpuParams.h"
-#include "BsCoreThread.h"
-#include "BsD3D11QueryManager.h"
-#include "BsDebug.h"
-#include "BsException.h"
-#include "BsRenderStats.h"
-
-namespace BansheeEngine
-{
-	D3D11RenderAPI::D3D11RenderAPI()
-		: mDXGIFactory(nullptr), mDevice(nullptr), mDriverList(nullptr)
-		, mActiveD3DDriver(nullptr), mFeatureLevel(D3D_FEATURE_LEVEL_11_0)
-		, mHLSLFactory(nullptr), mIAManager(nullptr)
-		, mStencilRef(0), mActiveDrawOp(DOT_TRIANGLE_LIST)
-		, mViewportNorm(0.0f, 0.0f, 1.0f, 1.0f)
-	{
-		mClipPlanesDirty = false; // DX11 handles clip planes through shaders
-	}
-
-	D3D11RenderAPI::~D3D11RenderAPI()
-	{
-
-	}
-
-	const StringID& D3D11RenderAPI::getName() const
-	{
-		static StringID strName("D3D11RenderAPI");
-		return strName;
-	}
-
-	const String& D3D11RenderAPI::getShadingLanguageName() const
-	{
-		static String strName("hlsl");
-		return strName;
-	}
-
-	void D3D11RenderAPI::initializePrepare()
-	{
-		THROW_IF_NOT_CORE_THREAD;
-
-		HRESULT hr = CreateDXGIFactory(__uuidof(IDXGIFactory), (void**)&mDXGIFactory);
-		if(FAILED(hr))
-			BS_EXCEPT(RenderingAPIException, "Failed to create Direct3D11 DXGIFactory");
-
-		mDriverList = bs_new<D3D11DriverList>(mDXGIFactory);
-		mActiveD3DDriver = mDriverList->item(0); // TODO: Always get first driver, for now
-		mVideoModeInfo = mActiveD3DDriver->getVideoModeInfo();
-
-		IDXGIAdapter* selectedAdapter = mActiveD3DDriver->getDeviceAdapter();
-
-		D3D_FEATURE_LEVEL requestedLevels[] = {
-			D3D_FEATURE_LEVEL_11_0,
-			D3D_FEATURE_LEVEL_10_1,
-			D3D_FEATURE_LEVEL_10_0,
-			D3D_FEATURE_LEVEL_9_3,
-			D3D_FEATURE_LEVEL_9_2,
-			D3D_FEATURE_LEVEL_9_1
-		};
-
-		UINT32 numRequestedLevel = sizeof(requestedLevels) / sizeof(requestedLevels[0]);
-
-		UINT32 deviceFlags = 0;
-
-#if BS_DEBUG_MODE
-		deviceFlags |= D3D11_CREATE_DEVICE_DEBUG;
-#endif
-
-		ID3D11Device* device;
-		hr = D3D11CreateDevice(selectedAdapter, D3D_DRIVER_TYPE_UNKNOWN, nullptr, deviceFlags, 
-			requestedLevels, numRequestedLevel, D3D11_SDK_VERSION, &device, &mFeatureLevel, nullptr);
-
-		if(FAILED(hr))         
-			BS_EXCEPT(RenderingAPIException, "Failed to create Direct3D11 object. D3D11CreateDeviceN returned this error code: " + toString(hr));
-
-		mDevice = bs_new<D3D11Device>(device);
-		
-		// This must query for DirectX 10 interface as this is unsupported for DX11
-		LARGE_INTEGER driverVersion; 
-		if(SUCCEEDED(selectedAdapter->CheckInterfaceSupport(IID_ID3D10Device, &driverVersion)))
-		{
-			mDriverVersion.major =  HIWORD(driverVersion.HighPart);
-			mDriverVersion.minor = LOWORD(driverVersion.HighPart);
-			mDriverVersion.release = HIWORD(driverVersion.LowPart);
-			mDriverVersion.build = LOWORD(driverVersion.LowPart);
-		}
-
-		// Create the texture manager for use by others		
-		TextureManager::startUp<D3D11TextureManager>();
-		TextureCoreManager::startUp<D3D11TextureCoreManager>();
-
-		// Create hardware buffer manager		
-		HardwareBufferManager::startUp();
-		HardwareBufferCoreManager::startUp<D3D11HardwareBufferCoreManager>(std::ref(*mDevice));
-
-		// Create render window manager
-		RenderWindowManager::startUp<D3D11RenderWindowManager>(this);
-		RenderWindowCoreManager::startUp<D3D11RenderWindowCoreManager>(this);
-
-		// Create & register HLSL factory		
-		mHLSLFactory = bs_new<D3D11HLSLProgramFactory>();
-
-		// Create render state manager
-		RenderStateCoreManager::startUp<D3D11RenderStateCoreManager>();
-
-		mCurrentCapabilities = createRenderSystemCapabilities();
-
-		mCurrentCapabilities->addShaderProfile("hlsl");
-		GpuProgramCoreManager::instance().addFactory(mHLSLFactory);
-
-		mIAManager = bs_new<D3D11InputLayoutManager>();
-
-		RenderAPICore::initializePrepare();
-	}
-
-	void D3D11RenderAPI::initializeFinalize(const SPtr<RenderWindowCore>& primaryWindow)
-	{
-		D3D11RenderUtility::startUp(mDevice);
-
-		QueryManager::startUp<D3D11QueryManager>();
-
-		RenderAPICore::initializeFinalize(primaryWindow);
-	}
-
-    void D3D11RenderAPI::destroyCore()
-	{
-		THROW_IF_NOT_CORE_THREAD;
-
-		for (auto& boundUAV : mBoundUAVs)
-		{
-			if (boundUAV.second != nullptr)
-				boundUAV.first->releaseView(boundUAV.second);
-		}
-
-		QueryManager::shutDown();
-		D3D11RenderUtility::shutDown();
-
-		if(mIAManager != nullptr)
-		{
-			bs_delete(mIAManager);
-			mIAManager = nullptr;
-		}
-
-		if(mHLSLFactory != nullptr)
-		{
-			bs_delete(mHLSLFactory);
-			mHLSLFactory = nullptr;
-		}
-
-		mActiveVertexDeclaration = nullptr;
-		mActiveVertexShader = nullptr;
-		mActiveRenderTarget = nullptr;
-
-		RenderStateCoreManager::shutDown();
-		RenderWindowCoreManager::shutDown();
-		RenderWindowManager::shutDown();
-		HardwareBufferCoreManager::shutDown();
-		HardwareBufferManager::shutDown();
-		TextureCoreManager::shutDown();
-		TextureManager::shutDown();
-
-		SAFE_RELEASE(mDXGIFactory);
-
-		if(mDevice != nullptr)
-		{
-			bs_delete(mDevice);
-			mDevice = nullptr;
-		}
-
-		if(mDriverList != nullptr)
-		{
-			bs_delete(mDriverList);
-			mDriverList = nullptr;
-		}
-
-		mActiveD3DDriver = nullptr;
-
-		RenderAPICore::destroyCore();
-	}
-
-	void D3D11RenderAPI::setSamplerState(GpuProgramType gptype, UINT16 texUnit, const SPtr<SamplerStateCore>& samplerState)
-	{
-		THROW_IF_NOT_CORE_THREAD;
-
-		// TODO - I'm setting up views one by one, it might be more efficient to hold them in an array
-		//  and then set them all up at once before rendering? Needs testing
-
-		ID3D11SamplerState* samplerArray[1];
-		D3D11SamplerStateCore* d3d11SamplerState = static_cast<D3D11SamplerStateCore*>(const_cast<SamplerStateCore*>(samplerState.get()));
-		samplerArray[0] = d3d11SamplerState->getInternal();
-
-		switch(gptype)
-		{
-		case GPT_VERTEX_PROGRAM:
-			mDevice->getImmediateContext()->VSSetSamplers(texUnit, 1, samplerArray);
-			break;
-		case GPT_FRAGMENT_PROGRAM:
-			mDevice->getImmediateContext()->PSSetSamplers(texUnit, 1, samplerArray);
-			break;
-		case GPT_GEOMETRY_PROGRAM:
-			mDevice->getImmediateContext()->GSSetSamplers(texUnit, 1, samplerArray);
-			break;
-		case GPT_DOMAIN_PROGRAM:
-			mDevice->getImmediateContext()->DSSetSamplers(texUnit, 1, samplerArray);
-			break;
-		case GPT_HULL_PROGRAM:
-			mDevice->getImmediateContext()->HSSetSamplers(texUnit, 1, samplerArray);
-			break;
-		case GPT_COMPUTE_PROGRAM:
-			mDevice->getImmediateContext()->CSSetSamplers(texUnit, 1, samplerArray);
-			break;
-		default:
-			BS_EXCEPT(InvalidParametersException, "Unsupported gpu program type: " + toString(gptype));
-		}
-
-		BS_INC_RENDER_STAT(NumSamplerBinds);
-	}
-
-	void D3D11RenderAPI::setBlendState(const SPtr<BlendStateCore>& blendState)
-	{
-		THROW_IF_NOT_CORE_THREAD;
-
-		D3D11BlendStateCore* d3d11BlendState = static_cast<D3D11BlendStateCore*>(const_cast<BlendStateCore*>(blendState.get()));
-		mDevice->getImmediateContext()->OMSetBlendState(d3d11BlendState->getInternal(), nullptr, 0xFFFFFFFF);
-
-		BS_INC_RENDER_STAT(NumBlendStateChanges);
-	}
-
-	void D3D11RenderAPI::setRasterizerState(const SPtr<RasterizerStateCore>& rasterizerState)
-	{
-		THROW_IF_NOT_CORE_THREAD;
-
-		D3D11RasterizerStateCore* d3d11RasterizerState = static_cast<D3D11RasterizerStateCore*>(const_cast<RasterizerStateCore*>(rasterizerState.get()));
-		mDevice->getImmediateContext()->RSSetState(d3d11RasterizerState->getInternal());
-
-		BS_INC_RENDER_STAT(NumRasterizerStateChanges);
-	}
-
-	void D3D11RenderAPI::setDepthStencilState(const SPtr<DepthStencilStateCore>& depthStencilState, UINT32 stencilRefValue)
-	{
-		THROW_IF_NOT_CORE_THREAD;
-
-		D3D11DepthStencilStateCore* d3d11RasterizerState = static_cast<D3D11DepthStencilStateCore*>(const_cast<DepthStencilStateCore*>(depthStencilState.get()));
-		mDevice->getImmediateContext()->OMSetDepthStencilState(d3d11RasterizerState->getInternal(), stencilRefValue);
-
-		BS_INC_RENDER_STAT(NumDepthStencilStateChanges);
-	}
-
-	void D3D11RenderAPI::setTexture(GpuProgramType gptype, UINT16 unit, bool enabled, const SPtr<TextureCore>& texPtr)
-	{
-		THROW_IF_NOT_CORE_THREAD;
-
-		// TODO - I'm setting up views one by one, it might be more efficient to hold them in an array
-		//  and then set them all up at once before rendering? Needs testing
-
-		ID3D11ShaderResourceView* viewArray[1];
-		if(texPtr != nullptr && enabled)
-		{
-			D3D11TextureCore* d3d11Texture = static_cast<D3D11TextureCore*>(texPtr.get());
-			viewArray[0] = d3d11Texture->getSRV();
-		}
-		else
-			viewArray[0] = nullptr;
-
-		switch(gptype)
-		{
-		case GPT_VERTEX_PROGRAM:
-			mDevice->getImmediateContext()->VSSetShaderResources(unit, 1, viewArray);
-			break;
-		case GPT_FRAGMENT_PROGRAM:
-			mDevice->getImmediateContext()->PSSetShaderResources(unit, 1, viewArray);
-			break;
-		case GPT_GEOMETRY_PROGRAM:
-			mDevice->getImmediateContext()->GSSetShaderResources(unit, 1, viewArray);
-			break;
-		case GPT_DOMAIN_PROGRAM:
-			mDevice->getImmediateContext()->DSSetShaderResources(unit, 1, viewArray);
-			break;
-		case GPT_HULL_PROGRAM:
-			mDevice->getImmediateContext()->HSSetShaderResources(unit, 1, viewArray);
-			break;
-		case GPT_COMPUTE_PROGRAM:
-			mDevice->getImmediateContext()->CSSetShaderResources(unit, 1, viewArray);
-			break;
-		default:
-			BS_EXCEPT(InvalidParametersException, "Unsupported gpu program type: " + toString(gptype));
-		}
-
-		BS_INC_RENDER_STAT(NumTextureBinds);
-	}
-
-	void D3D11RenderAPI::setLoadStoreTexture(GpuProgramType gptype, UINT16 unit, bool enabled, const SPtr<TextureCore>& texPtr,
-		const TextureSurface& surface)
-	{
-		THROW_IF_NOT_CORE_THREAD;
-
-		// TODO - This hasn't bee tested and might be incorrect. I might need to set UAVs together with render targets,
-		// especially considering DX11 expects number of UAVs to match number of render targets.
-
-		ID3D11UnorderedAccessView* viewArray[1];
-		if (texPtr != nullptr && enabled)
-		{
-			D3D11TextureCore* d3d11Texture = static_cast<D3D11TextureCore*>(texPtr.get());
-			TextureViewPtr texView = TextureCore::requestView(texPtr, surface.mipLevel, 1, 
-				surface.arraySlice, surface.numArraySlices, GVU_RANDOMWRITE);
-
-			D3D11TextureView* d3d11texView = static_cast<D3D11TextureView*>(texView.get());
-			viewArray[0] = d3d11texView->getUAV();
-
-			if (mBoundUAVs[unit].second != nullptr)
-				mBoundUAVs[unit].first->releaseView(mBoundUAVs[unit].second);
-
-			mBoundUAVs[unit] = std::make_pair(texPtr, texView);
-		}
-		else
-		{
-			viewArray[0] = nullptr;
-
-			if (mBoundUAVs[unit].second != nullptr)
-				mBoundUAVs[unit].first->releaseView(mBoundUAVs[unit].second);
-
-			mBoundUAVs[unit] = std::pair<SPtr<TextureCore>, TextureViewPtr>();
-		}
-
-		if (gptype == GPT_FRAGMENT_PROGRAM)
-		{
-			mDevice->getImmediateContext()->OMSetRenderTargetsAndUnorderedAccessViews(
-				D3D11_KEEP_RENDER_TARGETS_AND_DEPTH_STENCIL, nullptr, nullptr, unit, 1, viewArray, nullptr);
-		}
-		else if (gptype == GPT_COMPUTE_PROGRAM)
-		{
-			mDevice->getImmediateContext()->CSSetUnorderedAccessViews(unit, 1, viewArray, nullptr);
-		}
-		else
-			BS_EXCEPT(InvalidParametersException, "Unsupported gpu program type: " + toString(gptype));
-
-		BS_INC_RENDER_STAT(NumTextureBinds);
-	}
-
-	void D3D11RenderAPI::disableTextureUnit(GpuProgramType gptype, UINT16 texUnit)
-	{
-		THROW_IF_NOT_CORE_THREAD;
-
-		setTexture(gptype, texUnit, false, nullptr);
-	}
-
-	void D3D11RenderAPI::beginFrame()
-	{
-		// Not used
-	}
-
-	void D3D11RenderAPI::endFrame()
-	{
-		// Not used
-	}
-
-	void D3D11RenderAPI::setViewport(const Rect2& vp)
-	{
-		THROW_IF_NOT_CORE_THREAD;
-
-		mViewportNorm = vp;
-		applyViewport();
-	}
-
-	void D3D11RenderAPI::applyViewport()
-	{
-		if (mActiveRenderTarget == nullptr)
-			return;
-
-		const RenderTargetProperties& rtProps = mActiveRenderTarget->getProperties();
-
-		// Set viewport dimensions
-		mViewport.TopLeftX = (FLOAT)(rtProps.getWidth() * mViewportNorm.x);
-		mViewport.TopLeftY = (FLOAT)(rtProps.getHeight() * mViewportNorm.y);
-		mViewport.Width = (FLOAT)(rtProps.getWidth() * mViewportNorm.width);
-		mViewport.Height = (FLOAT)(rtProps.getHeight() * mViewportNorm.height);
-
-		if (rtProps.requiresTextureFlipping())
-		{
-			// Convert "top-left" to "bottom-left"
-			mViewport.TopLeftY = rtProps.getHeight() - mViewport.Height - mViewport.TopLeftY;
-		}
-
-		mViewport.MinDepth = 0.0f;
-		mViewport.MaxDepth = 1.0f;
-
-		mDevice->getImmediateContext()->RSSetViewports(1, &mViewport);
-	}
-
-	void D3D11RenderAPI::setVertexBuffers(UINT32 index, SPtr<VertexBufferCore>* buffers, UINT32 numBuffers)
-	{
-		THROW_IF_NOT_CORE_THREAD;
-
-		UINT32 maxBoundVertexBuffers = mCurrentCapabilities->getMaxBoundVertexBuffers();
-		if(index < 0 || (index + numBuffers) >= maxBoundVertexBuffers)
-			BS_EXCEPT(InvalidParametersException, "Invalid vertex index: " + toString(index) + ". Valid range is 0 .. " + toString(maxBoundVertexBuffers - 1));
-
-		ID3D11Buffer* dx11buffers[MAX_BOUND_VERTEX_BUFFERS];
-		UINT32 strides[MAX_BOUND_VERTEX_BUFFERS];
-		UINT32 offsets[MAX_BOUND_VERTEX_BUFFERS];
-
-		for(UINT32 i = 0; i < numBuffers; i++)
-		{
-			SPtr<D3D11VertexBufferCore> vertexBuffer = std::static_pointer_cast<D3D11VertexBufferCore>(buffers[i]);
-			const VertexBufferProperties& vbProps = vertexBuffer->getProperties();
-
-			dx11buffers[i] = vertexBuffer->getD3DVertexBuffer();
-
-			strides[i] = vbProps.getVertexSize();
-			offsets[i] = 0;
-		}
-
-		mDevice->getImmediateContext()->IASetVertexBuffers(index, numBuffers, dx11buffers, strides, offsets);
-
-		BS_INC_RENDER_STAT(NumVertexBufferBinds);
-	}
-
-	void D3D11RenderAPI::setIndexBuffer(const SPtr<IndexBufferCore>& buffer)
-	{
-		THROW_IF_NOT_CORE_THREAD;
-
-		SPtr<D3D11IndexBufferCore> indexBuffer = std::static_pointer_cast<D3D11IndexBufferCore>(buffer);
-
-		DXGI_FORMAT indexFormat = DXGI_FORMAT_R16_UINT;
-		if(indexBuffer->getProperties().getType() == IT_16BIT)
-			indexFormat = DXGI_FORMAT_R16_UINT;
-		else if (indexBuffer->getProperties().getType() == IT_32BIT)
-			indexFormat = DXGI_FORMAT_R32_UINT;
-		else
-			BS_EXCEPT(InternalErrorException, "Unsupported index format: " + toString(indexBuffer->getProperties().getType()));
-
-		mDevice->getImmediateContext()->IASetIndexBuffer(indexBuffer->getD3DIndexBuffer(), indexFormat, 0);
-
-		BS_INC_RENDER_STAT(NumIndexBufferBinds);
-	}
-
-	void D3D11RenderAPI::setVertexDeclaration(const SPtr<VertexDeclarationCore>& vertexDeclaration)
-	{
-		THROW_IF_NOT_CORE_THREAD;
-
-		mActiveVertexDeclaration = vertexDeclaration;
-	}
-
-	void D3D11RenderAPI::setDrawOperation(DrawOperationType op)
-	{
-		THROW_IF_NOT_CORE_THREAD;
-
-		mDevice->getImmediateContext()->IASetPrimitiveTopology(D3D11Mappings::getPrimitiveType(op));
-	}
-
-	void D3D11RenderAPI::bindGpuProgram(const SPtr<GpuProgramCore>& prg)
-	{
-		THROW_IF_NOT_CORE_THREAD;
-
-		switch(prg->getProperties().getType())
-		{
-		case GPT_VERTEX_PROGRAM:
-			{
-				D3D11GpuVertexProgramCore* d3d11GpuProgram = static_cast<D3D11GpuVertexProgramCore*>(prg.get());
-				mDevice->getImmediateContext()->VSSetShader(d3d11GpuProgram->getVertexShader(), nullptr, 0);
-				mActiveVertexShader = std::static_pointer_cast<D3D11GpuProgramCore>(prg);
-				break;
-			}
-		case GPT_FRAGMENT_PROGRAM:
-			{
-				D3D11GpuFragmentProgramCore* d3d11GpuProgram = static_cast<D3D11GpuFragmentProgramCore*>(prg.get());
-				mDevice->getImmediateContext()->PSSetShader(d3d11GpuProgram->getPixelShader(), nullptr, 0);
-				break;
-			}
-		case GPT_GEOMETRY_PROGRAM:
-			{
-				D3D11GpuGeometryProgramCore* d3d11GpuProgram = static_cast<D3D11GpuGeometryProgramCore*>(prg.get());
-				mDevice->getImmediateContext()->GSSetShader(d3d11GpuProgram->getGeometryShader(), nullptr, 0);
-				break;
-			}
-		case GPT_DOMAIN_PROGRAM:
-			{
-				D3D11GpuDomainProgramCore* d3d11GpuProgram = static_cast<D3D11GpuDomainProgramCore*>(prg.get());
-				mDevice->getImmediateContext()->DSSetShader(d3d11GpuProgram->getDomainShader(), nullptr, 0);
-				break;
-			}
-		case GPT_HULL_PROGRAM:
-			{
-				D3D11GpuHullProgramCore* d3d11GpuProgram = static_cast<D3D11GpuHullProgramCore*>(prg.get());
-				mDevice->getImmediateContext()->HSSetShader(d3d11GpuProgram->getHullShader(), nullptr, 0);
-				break;
-			}
-		case GPT_COMPUTE_PROGRAM:
-			{
-				D3D11GpuComputeProgramCore* d3d11GpuProgram = static_cast<D3D11GpuComputeProgramCore*>(prg.get());
-				mDevice->getImmediateContext()->CSSetShader(d3d11GpuProgram->getComputeShader(), nullptr, 0);
-				break;
-			}
-		default:
-			BS_EXCEPT(InvalidParametersException, "Unsupported gpu program type: " + toString(prg->getProperties().getType()));
-		}
-
-		if (mDevice->hasError())
-			BS_EXCEPT(RenderingAPIException, "Failed to bindGpuProgram : " + mDevice->getErrorDescription());
-
-		BS_INC_RENDER_STAT(NumGpuProgramBinds);
-	}
-
-	void D3D11RenderAPI::unbindGpuProgram(GpuProgramType gptype)
-	{
-		THROW_IF_NOT_CORE_THREAD;
-
-		switch(gptype)
-		{
-		case GPT_VERTEX_PROGRAM:
-			mDevice->getImmediateContext()->VSSetShader(nullptr, nullptr, 0);
-			mActiveVertexShader = nullptr;
-			break;
-		case GPT_FRAGMENT_PROGRAM:
-			mDevice->getImmediateContext()->PSSetShader(nullptr, nullptr, 0);
-			break;
-		case GPT_GEOMETRY_PROGRAM:
-			mDevice->getImmediateContext()->GSSetShader(nullptr, nullptr, 0);
-			break;
-		case GPT_DOMAIN_PROGRAM:
-			mDevice->getImmediateContext()->DSSetShader(nullptr, nullptr, 0);
-			break;
-		case GPT_HULL_PROGRAM:
-			mDevice->getImmediateContext()->HSSetShader(nullptr, nullptr, 0);
-			break;
-		case GPT_COMPUTE_PROGRAM:
-			mDevice->getImmediateContext()->CSSetShader(nullptr, nullptr, 0);
-			break;
-		default:
-			BS_EXCEPT(InvalidParametersException, "Unsupported gpu program type: " + toString(gptype));
-		}
-
-		BS_INC_RENDER_STAT(NumGpuProgramBinds);
-	}
-
-	void D3D11RenderAPI::setConstantBuffers(GpuProgramType gptype, const SPtr<GpuParamsCore>& bindableParams)
-	{
-		THROW_IF_NOT_CORE_THREAD;
-
-		bindableParams->updateHardwareBuffers();
-		const GpuParamDesc& paramDesc = bindableParams->getParamDesc();
-
-		// TODO - I assign constant buffers one by one but it might be more efficient to do them all at once?
-		ID3D11Buffer* bufferArray[1];
-		for(auto iter = paramDesc.paramBlocks.begin(); iter != paramDesc.paramBlocks.end(); ++iter)
-		{
-			SPtr<GpuParamBlockBufferCore> currentBlockBuffer = bindableParams->getParamBlockBuffer(iter->second.slot);
-
-			if(currentBlockBuffer != nullptr)
-			{
-				const D3D11GpuParamBlockBufferCore* d3d11paramBlockBuffer = 
-					static_cast<const D3D11GpuParamBlockBufferCore*>(currentBlockBuffer.get());
-				bufferArray[0] = d3d11paramBlockBuffer->getD3D11Buffer();
-			}
-			else
-				bufferArray[0] = nullptr;
-
-			switch(gptype)
-			{
-			case GPT_VERTEX_PROGRAM:
-				mDevice->getImmediateContext()->VSSetConstantBuffers(iter->second.slot, 1, bufferArray);
-				break;
-			case GPT_FRAGMENT_PROGRAM:
-				mDevice->getImmediateContext()->PSSetConstantBuffers(iter->second.slot, 1, bufferArray);
-				break;
-			case GPT_GEOMETRY_PROGRAM:
-				mDevice->getImmediateContext()->GSSetConstantBuffers(iter->second.slot, 1, bufferArray);
-				break;
-			case GPT_HULL_PROGRAM:
-				mDevice->getImmediateContext()->HSSetConstantBuffers(iter->second.slot, 1, bufferArray);
-				break;
-			case GPT_DOMAIN_PROGRAM:
-				mDevice->getImmediateContext()->DSSetConstantBuffers(iter->second.slot, 1, bufferArray);
-				break;
-			case GPT_COMPUTE_PROGRAM:
-				mDevice->getImmediateContext()->CSSetConstantBuffers(iter->second.slot, 1, bufferArray);
-				break;
-			};
-
-			BS_INC_RENDER_STAT(NumGpuParamBufferBinds);
-		}
-
-		if (mDevice->hasError())
-			BS_EXCEPT(RenderingAPIException, "Failed to setConstantBuffers : " + mDevice->getErrorDescription());
-	}
-
-	void D3D11RenderAPI::draw(UINT32 vertexOffset, UINT32 vertexCount)
-	{
-		THROW_IF_NOT_CORE_THREAD;
-
-		applyInputLayout();
-
-		mDevice->getImmediateContext()->Draw(vertexCount, vertexOffset);
-
-#if BS_DEBUG_MODE
-		if(mDevice->hasError())
-			LOGWRN(mDevice->getErrorDescription());
-#endif
-
-		UINT32 primCount = vertexCountToPrimCount(mActiveDrawOp, vertexCount);
-
-		BS_INC_RENDER_STAT(NumDrawCalls);
-		BS_ADD_RENDER_STAT(NumVertices, vertexCount);
-		BS_ADD_RENDER_STAT(NumPrimitives, primCount);
-	}
-
-	void D3D11RenderAPI::drawIndexed(UINT32 startIndex, UINT32 indexCount, UINT32 vertexOffset, UINT32 vertexCount)
-	{
-		THROW_IF_NOT_CORE_THREAD;
-
-		applyInputLayout();
-
-		mDevice->getImmediateContext()->DrawIndexed(indexCount, startIndex, vertexOffset);
-
-#if BS_DEBUG_MODE
-		if(mDevice->hasError())
-			LOGWRN(mDevice->getErrorDescription());
-#endif
-
-		UINT32 primCount = vertexCountToPrimCount(mActiveDrawOp, vertexCount);
-
-		BS_INC_RENDER_STAT(NumDrawCalls);
-		BS_ADD_RENDER_STAT(NumVertices, vertexCount);
-		BS_ADD_RENDER_STAT(NumPrimitives, primCount);
-	}
-
-	void D3D11RenderAPI::setScissorRect(UINT32 left, UINT32 top, UINT32 right, UINT32 bottom)
-	{
-		THROW_IF_NOT_CORE_THREAD;
-
-		mScissorRect.left = static_cast<LONG>(left);
-		mScissorRect.top = static_cast<LONG>(top);
-		mScissorRect.bottom = static_cast<LONG>(bottom);
-		mScissorRect.right = static_cast<LONG>(right);
-
-		mDevice->getImmediateContext()->RSSetScissorRects(1, &mScissorRect);
-	}
-
-	void D3D11RenderAPI::clearViewport(UINT32 buffers, const Color& color, float depth, UINT16 stencil)
-	{
-		THROW_IF_NOT_CORE_THREAD;
-
-		if(mActiveRenderTarget == nullptr)
-			return;
-
-		const RenderTargetProperties& rtProps = mActiveRenderTarget->getProperties();
-
-		Rect2I clearArea((int)mViewport.TopLeftX, (int)mViewport.TopLeftY, (int)mViewport.Width, (int)mViewport.Height);
-
-		bool clearEntireTarget = clearArea.width == 0 || clearArea.height == 0;
-		clearEntireTarget |= (clearArea.x == 0 && clearArea.y == 0 && clearArea.width == rtProps.getWidth() && clearArea.height == rtProps.getHeight());
-
-		if (!clearEntireTarget)
-		{
-			D3D11RenderUtility::instance().drawClearQuad(buffers, color, depth, stencil);
-			BS_INC_RENDER_STAT(NumClears);
-		}
-		else
-			clearRenderTarget(buffers, color, depth, stencil);
-	}
-
-	void D3D11RenderAPI::clearRenderTarget(UINT32 buffers, const Color& color, float depth, UINT16 stencil)
-	{
-		THROW_IF_NOT_CORE_THREAD;
-
-		if(mActiveRenderTarget == nullptr)
-			return;
-
-		// Clear render surfaces
-		if (buffers & FBT_COLOR)
-		{
-			UINT32 maxRenderTargets = mCurrentCapabilities->getNumMultiRenderTargets();
-
-			ID3D11RenderTargetView** views = bs_newN<ID3D11RenderTargetView*>(maxRenderTargets);
-			memset(views, 0, sizeof(ID3D11RenderTargetView*) * maxRenderTargets);
-
-			mActiveRenderTarget->getCustomAttribute("RTV", views);
-			if (!views[0])
-			{
-				bs_deleteN(views, maxRenderTargets);
-				return;
-			}
-
-			float clearColor[4];
-			clearColor[0] = color.r;
-			clearColor[1] = color.g;
-			clearColor[2] = color.b;
-			clearColor[3] = color.a;
-
-			for(UINT32 i = 0; i < maxRenderTargets; i++)
-			{
-				if(views[i] != nullptr)
-					mDevice->getImmediateContext()->ClearRenderTargetView(views[i], clearColor);
-			}
-
-			bs_deleteN(views, maxRenderTargets);
-		}
-
-		// Clear depth stencil
-		if((buffers & FBT_DEPTH) != 0 || (buffers & FBT_STENCIL) != 0)
-		{
-			ID3D11DepthStencilView* depthStencilView = nullptr;
-			mActiveRenderTarget->getCustomAttribute("DSV", &depthStencilView);
-
-			D3D11_CLEAR_FLAG clearFlag;
-
-			if((buffers & FBT_DEPTH) != 0 && (buffers & FBT_STENCIL) != 0)
-				clearFlag = (D3D11_CLEAR_FLAG)(D3D11_CLEAR_DEPTH | D3D11_CLEAR_STENCIL);
-			else if((buffers & FBT_STENCIL) != 0)
-				clearFlag = D3D11_CLEAR_STENCIL;
-			else
-				clearFlag = D3D11_CLEAR_DEPTH;
-
-			if(depthStencilView != nullptr)
-				mDevice->getImmediateContext()->ClearDepthStencilView(depthStencilView, clearFlag, depth, (UINT8)stencil);
-		}
-
-		BS_INC_RENDER_STAT(NumClears);
-	}
-
-	void D3D11RenderAPI::setRenderTarget(const SPtr<RenderTargetCore>& target)
-	{
-		THROW_IF_NOT_CORE_THREAD;
-
-		mActiveRenderTarget = target;
-
-		UINT32 maxRenderTargets = mCurrentCapabilities->getNumMultiRenderTargets();
-		ID3D11RenderTargetView** views = bs_newN<ID3D11RenderTargetView*>(maxRenderTargets);
-		memset(views, 0, sizeof(ID3D11RenderTargetView*) * maxRenderTargets);
-
-		ID3D11DepthStencilView* depthStencilView = nullptr;
-
-		if (target != nullptr)
-		{
-			target->getCustomAttribute("RTV", views);
-			target->getCustomAttribute("DSV", &depthStencilView);
-		}
-
-		// Bind render targets
-		mDevice->getImmediateContext()->OMSetRenderTargets(maxRenderTargets, views, depthStencilView);
-		if (mDevice->hasError())
-			BS_EXCEPT(RenderingAPIException, "Failed to setRenderTarget : " + mDevice->getErrorDescription());
-
-		bs_deleteN(views, maxRenderTargets);
-		applyViewport();
-
-		BS_INC_RENDER_STAT(NumRenderTargetChanges);
-	}
-
-	void D3D11RenderAPI::setClipPlanesImpl(const PlaneList& clipPlanes)
-	{
-		LOGWRN("This call will be ignored. DX11 uses shaders for setting clip planes.");
-	}
-
-	RenderAPICapabilities* D3D11RenderAPI::createRenderSystemCapabilities() const
-	{
-		THROW_IF_NOT_CORE_THREAD;
-
-		RenderAPICapabilities* rsc = bs_new<RenderAPICapabilities>();
-
-		rsc->setDriverVersion(mDriverVersion);
-		rsc->setDeviceName(mActiveD3DDriver->getDriverDescription());
-		rsc->setRenderAPIName(getName());
-
-		rsc->setStencilBufferBitDepth(8);
-
-		rsc->setCapability(RSC_ANISOTROPY);
-		rsc->setCapability(RSC_AUTOMIPMAP);
-
-		// Cube map
-		rsc->setCapability(RSC_CUBEMAPPING);
-
-		// We always support compression, D3DX will decompress if device does not support
-		rsc->setCapability(RSC_TEXTURE_COMPRESSION);
-		rsc->setCapability(RSC_TEXTURE_COMPRESSION_DXT);
-		rsc->setCapability(RSC_TWO_SIDED_STENCIL);
-		rsc->setCapability(RSC_STENCIL_WRAP);
-		rsc->setCapability(RSC_HWOCCLUSION);
-		rsc->setCapability(RSC_HWOCCLUSION_ASYNCHRONOUS);
-
-		if(mFeatureLevel >= D3D_FEATURE_LEVEL_10_1)
-			rsc->setMaxBoundVertexBuffers(32);
-		else
-			rsc->setMaxBoundVertexBuffers(16);
-
-		if(mFeatureLevel >= D3D_FEATURE_LEVEL_10_0)
-		{
-			rsc->addShaderProfile("ps_4_0");
-			rsc->addShaderProfile("vs_4_0");
-			rsc->addShaderProfile("gs_4_0");
-
-			rsc->addGpuProgramProfile(GPP_FS_4_0, "ps_4_0");
-			rsc->addGpuProgramProfile(GPP_VS_4_0, "vs_4_0");
-			rsc->addGpuProgramProfile(GPP_GS_4_0, "gs_4_0");
-
-			rsc->setNumTextureUnits(GPT_FRAGMENT_PROGRAM, D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT);
-			rsc->setNumTextureUnits(GPT_VERTEX_PROGRAM, D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT);
-			rsc->setNumTextureUnits(GPT_GEOMETRY_PROGRAM, D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT);
-
-			rsc->setNumCombinedTextureUnits(rsc->getNumTextureUnits(GPT_FRAGMENT_PROGRAM)
-				+ rsc->getNumTextureUnits(GPT_VERTEX_PROGRAM) + rsc->getNumTextureUnits(GPT_VERTEX_PROGRAM));
-
-			rsc->setNumGpuParamBlockBuffers(GPT_FRAGMENT_PROGRAM, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT);
-			rsc->setNumGpuParamBlockBuffers(GPT_VERTEX_PROGRAM, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT);
-			rsc->setNumGpuParamBlockBuffers(GPT_GEOMETRY_PROGRAM, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT);
-
-			rsc->setNumCombinedGpuParamBlockBuffers(rsc->getNumGpuParamBlockBuffers(GPT_FRAGMENT_PROGRAM)
-				+ rsc->getNumGpuParamBlockBuffers(GPT_VERTEX_PROGRAM) + rsc->getNumGpuParamBlockBuffers(GPT_VERTEX_PROGRAM));
-		}
-
-		if(mFeatureLevel >= D3D_FEATURE_LEVEL_10_1)
-		{
-			rsc->addShaderProfile("ps_4_1");
-			rsc->addShaderProfile("vs_4_1");
-			rsc->addShaderProfile("gs_4_1");
-
-			rsc->addGpuProgramProfile(GPP_FS_4_1, "ps_4_1");
-			rsc->addGpuProgramProfile(GPP_VS_4_1, "vs_4_1");
-			rsc->addGpuProgramProfile(GPP_GS_4_1, "gs_4_1");
-		}
-
-		if(mFeatureLevel >= D3D_FEATURE_LEVEL_11_0)
-		{
-			rsc->addShaderProfile("ps_5_0");
-			rsc->addShaderProfile("vs_5_0");
-			rsc->addShaderProfile("gs_5_0");
-			rsc->addShaderProfile("cs_5_0");
-			rsc->addShaderProfile("hs_5_0");
-			rsc->addShaderProfile("ds_5_0");
-
-			rsc->addGpuProgramProfile(GPP_FS_5_0, "ps_5_0");
-			rsc->addGpuProgramProfile(GPP_VS_5_0, "vs_5_0");
-			rsc->addGpuProgramProfile(GPP_GS_5_0, "gs_5_0");
-			rsc->addGpuProgramProfile(GPP_CS_5_0, "cs_5_0");
-			rsc->addGpuProgramProfile(GPP_HS_5_0, "hs_5_0");
-			rsc->addGpuProgramProfile(GPP_DS_5_0, "ds_5_0");
-
-			rsc->setNumTextureUnits(GPT_HULL_PROGRAM, D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT);
-			rsc->setNumTextureUnits(GPT_DOMAIN_PROGRAM, D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT);
-			rsc->setNumTextureUnits(GPT_COMPUTE_PROGRAM, D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT);
-
-			rsc->setNumCombinedTextureUnits(rsc->getNumTextureUnits(GPT_FRAGMENT_PROGRAM)
-				+ rsc->getNumTextureUnits(GPT_VERTEX_PROGRAM) + rsc->getNumTextureUnits(GPT_VERTEX_PROGRAM)
-				+ rsc->getNumTextureUnits(GPT_HULL_PROGRAM) + rsc->getNumTextureUnits(GPT_DOMAIN_PROGRAM)
-				+ rsc->getNumTextureUnits(GPT_COMPUTE_PROGRAM));
-
-			rsc->setNumGpuParamBlockBuffers(GPT_HULL_PROGRAM, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT);
-			rsc->setNumGpuParamBlockBuffers(GPT_DOMAIN_PROGRAM, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT);
-			rsc->setNumGpuParamBlockBuffers(GPT_COMPUTE_PROGRAM, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT);
-
-			rsc->setNumCombinedGpuParamBlockBuffers(rsc->getNumGpuParamBlockBuffers(GPT_FRAGMENT_PROGRAM)
-				+ rsc->getNumGpuParamBlockBuffers(GPT_VERTEX_PROGRAM) + rsc->getNumGpuParamBlockBuffers(GPT_VERTEX_PROGRAM)
-				+ rsc->getNumGpuParamBlockBuffers(GPT_HULL_PROGRAM) + rsc->getNumGpuParamBlockBuffers(GPT_DOMAIN_PROGRAM)
-				+ rsc->getNumGpuParamBlockBuffers(GPT_COMPUTE_PROGRAM));
-
-			rsc->setCapability(RSC_SHADER_SUBROUTINE);
-		}
-
-		rsc->setCapability(RSC_USER_CLIP_PLANES);
-		rsc->setCapability(RSC_VERTEX_FORMAT_UBYTE4);
-
-		// Adapter details
-		const DXGI_ADAPTER_DESC& adapterID = mActiveD3DDriver->getAdapterIdentifier();
-
-		// Determine vendor
-		switch(adapterID.VendorId)
-		{
-		case 0x10DE:
-			rsc->setVendor(GPU_NVIDIA);
-			break;
-		case 0x1002:
-			rsc->setVendor(GPU_AMD);
-			break;
-		case 0x163C:
-		case 0x8086:
-			rsc->setVendor(GPU_INTEL);
-			break;
-		default:
-			rsc->setVendor(GPU_UNKNOWN);
-			break;
-		};
-
-		rsc->setCapability(RSC_INFINITE_FAR_PLANE);
-
-		rsc->setCapability(RSC_TEXTURE_3D);
-		rsc->setCapability(RSC_NON_POWER_OF_2_TEXTURES);
-		rsc->setCapability(RSC_HWRENDER_TO_TEXTURE);
-		rsc->setCapability(RSC_TEXTURE_FLOAT);
-
-		rsc->setNumMultiRenderTargets(D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT);
-		rsc->setCapability(RSC_MRT_DIFFERENT_BIT_DEPTHS);
-
-		rsc->setCapability(RSC_POINT_SPRITES);
-		rsc->setCapability(RSC_POINT_EXTENDED_PARAMETERS);
-		rsc->setMaxPointSize(256);
-
-		rsc->setCapability(RSC_VERTEX_TEXTURE_FETCH);
-
-		rsc->setCapability(RSC_MIPMAP_LOD_BIAS);
-
-		rsc->setCapability(RSC_PERSTAGECONSTANT);
-
-		return rsc;
-	}
-
-	void D3D11RenderAPI::determineMultisampleSettings(UINT32 multisampleCount, DXGI_FORMAT format, DXGI_SAMPLE_DESC* outputSampleDesc)
-	{
-		bool tryCSAA = false; // Note: Disabled for now, but leaving the code for later so it might be useful
-		enum CSAAMode { CSAA_Normal, CSAA_Quality };
-		CSAAMode csaaMode = CSAA_Normal;
-
-		bool foundValid = false;
-		size_t origNumSamples = multisampleCount;
-		while (!foundValid)
-		{
-			// Deal with special cases
-			if (tryCSAA)
-			{
-				switch(multisampleCount)
-				{
-				case 8:
-					if (csaaMode == CSAA_Quality)
-					{
-						outputSampleDesc->Count = 8;
-						outputSampleDesc->Quality = 8;
-					}
-					else
-					{
-						outputSampleDesc->Count = 4;
-						outputSampleDesc->Quality = 8;
-					}
-					break;
-				case 16:
-					if (csaaMode == CSAA_Quality)
-					{
-						outputSampleDesc->Count = 8;
-						outputSampleDesc->Quality = 16;
-					}
-					else
-					{
-						outputSampleDesc->Count = 4;
-						outputSampleDesc->Quality = 16;
-					}
-					break;
-				}
-			}
-			else // !CSAA
-			{
-				outputSampleDesc->Count = multisampleCount == 0 ? 1 : multisampleCount;
-				outputSampleDesc->Quality = D3D11_STANDARD_MULTISAMPLE_PATTERN;
-			}
-
-
-			HRESULT hr;
-			UINT outQuality;
-			hr = mDevice->getD3D11Device()->CheckMultisampleQualityLevels(format, outputSampleDesc->Count, &outQuality);
-
-			if (SUCCEEDED(hr) && (!tryCSAA || outQuality > outputSampleDesc->Quality))
-			{
-				foundValid = true;
-			}
-			else
-			{
-				// Downgrade
-				if (tryCSAA && multisampleCount == 8)
-				{
-					// For CSAA, we'll try downgrading with quality mode at all samples.
-					// then try without quality, then drop CSAA
-					if (csaaMode == CSAA_Quality)
-					{
-						// Drop quality first
-						csaaMode = CSAA_Normal;
-					}
-					else
-					{
-						// Drop CSAA entirely 
-						tryCSAA = false;
-					}
-
-					// Return to original requested samples
-					multisampleCount = static_cast<UINT32>(origNumSamples);
-				}
-				else
-				{
-					// Drop samples
-					multisampleCount--;
-
-					if (multisampleCount == 1)
-					{
-						// Ran out of options, no multisampling
-						multisampleCount = 0;
-						foundValid = true;
-					}
-				}
-			}
-		} 
-	}
-
-	VertexElementType D3D11RenderAPI::getColorVertexElementType() const
-	{
-		return VET_COLOR_ABGR;
-	}
-
-	void D3D11RenderAPI::convertProjectionMatrix(const Matrix4& matrix, Matrix4& dest)
-	{
-		dest = matrix;
-
-		// Convert depth range from [-1,+1] to [0,1]
-		dest[2][0] = (dest[2][0] + dest[3][0]) / 2;
-		dest[2][1] = (dest[2][1] + dest[3][1]) / 2;
-		dest[2][2] = (dest[2][2] + dest[3][2]) / 2;
-		dest[2][3] = (dest[2][3] + dest[3][3]) / 2;
-	}
-
-	float D3D11RenderAPI::getHorizontalTexelOffset()
-	{
-		return 0.0f;
-	}
-
-	float D3D11RenderAPI::getVerticalTexelOffset()
-	{
-		return 0.0f;
-	}
-
-	float D3D11RenderAPI::getMinimumDepthInputValue()
-	{
-		return 0.0f;
-	}
-
-	float D3D11RenderAPI::getMaximumDepthInputValue()
-	{
-		return 1.0f;
-	}
-
-	GpuParamBlockDesc D3D11RenderAPI::generateParamBlockDesc(const String& name, Vector<GpuParamDataDesc>& params)
-	{
-		GpuParamBlockDesc block;
-		block.blockSize = 0;
-		block.isShareable = true;
-		block.name = name;
-		block.slot = 0;
-
-		for (auto& param : params)
-		{
-			const GpuParamDataTypeInfo& typeInfo = GpuParams::PARAM_SIZES.lookup[param.type];
-			UINT32 size = typeInfo.size / 4;
-
-			if (param.arraySize > 1)
-			{
-				// Arrays perform no packing and their elements are always padded and aligned to four component vectors
-				UINT32 alignOffset = size % typeInfo.baseTypeSize;
-				if (alignOffset != 0)
-				{
-					UINT32 padding = (typeInfo.baseTypeSize - alignOffset);
-					size += padding;
-				}
-
-				alignOffset = block.blockSize % typeInfo.baseTypeSize;
-				if (alignOffset != 0)
-				{
-					UINT32 padding = (typeInfo.baseTypeSize - alignOffset);
-					block.blockSize += padding;
-				}
-
-				param.elementSize = size;
-				param.arrayElementStride = size;
-				param.cpuMemOffset = block.blockSize;
-				param.gpuMemOffset = 0;
-
-				block.blockSize += size * param.arraySize;
-			}
-			else
-			{
-				// Pack everything as tightly as possible as long as the data doesn't cross 16 byte boundary
-				UINT32 alignOffset = block.blockSize % 4;
-				if (alignOffset != 0 && size > (4 - alignOffset))
-				{
-					UINT32 padding = (4 - alignOffset);
-					block.blockSize += padding;
-				}
-
-				param.elementSize = size;
-				param.arrayElementStride = size;
-				param.cpuMemOffset = block.blockSize;
-				param.gpuMemOffset = 0;
-
-				block.blockSize += size;
-			}
-
-			param.paramBlockSlot = 0;
-		}
-
-		// Constant buffer size must always be a multiple of 16
-		if (block.blockSize % 4 != 0)
-			block.blockSize += (4 - (block.blockSize % 4));
-
-		return block;
-	}
-
-	/************************************************************************/
-	/* 								PRIVATE		                     		*/
-	/************************************************************************/
-
-	void D3D11RenderAPI::applyInputLayout()
-	{
-		if(mActiveVertexDeclaration == nullptr)
-		{
-			LOGWRN("Cannot apply input layout without a vertex declaration. Set vertex declaration before calling this method.");
-			return;
-		}
-
-		if(mActiveVertexShader == nullptr)
-		{
-			LOGWRN("Cannot apply input layout without a vertex shader. Set vertex shader before calling this method.");
-			return;
-		}
-
-		ID3D11InputLayout* ia = mIAManager->retrieveInputLayout(mActiveVertexShader->getInputDeclaration(), mActiveVertexDeclaration, *mActiveVertexShader);
-
-		mDevice->getImmediateContext()->IASetInputLayout(ia);
-	}
+#include "BsD3D11RenderAPI.h"
+#include "BsD3D11DriverList.h"
+#include "BsD3D11Driver.h"
+#include "BsD3D11Device.h"
+#include "BsD3D11TextureManager.h"
+#include "BsD3D11Texture.h"
+#include "BsD3D11HardwareBufferManager.h"
+#include "BsD3D11RenderWindowManager.h"
+#include "BsD3D11HLSLProgramFactory.h"
+#include "BsD3D11BlendState.h"
+#include "BsD3D11RasterizerState.h"
+#include "BsD3D11DepthStencilState.h"
+#include "BsD3D11SamplerState.h"
+#include "BsD3D11GpuProgram.h"
+#include "BsD3D11Mappings.h"
+#include "BsD3D11VertexBuffer.h"
+#include "BsD3D11IndexBuffer.h"
+#include "BsD3D11RenderStateManager.h"
+#include "BsD3D11GpuParamBlockBuffer.h"
+#include "BsD3D11InputLayoutManager.h"
+#include "BsD3D11TextureView.h"
+#include "BsD3D11RenderUtility.h"
+#include "BsGpuParams.h"
+#include "BsCoreThread.h"
+#include "BsD3D11QueryManager.h"
+#include "BsDebug.h"
+#include "BsException.h"
+#include "BsRenderStats.h"
+#include "BsGpuParamDesc.h"
+
+namespace BansheeEngine
+{
+	D3D11RenderAPI::D3D11RenderAPI()
+		: mDXGIFactory(nullptr), mDevice(nullptr), mDriverList(nullptr)
+		, mActiveD3DDriver(nullptr), mFeatureLevel(D3D_FEATURE_LEVEL_11_0)
+		, mHLSLFactory(nullptr), mIAManager(nullptr)
+		, mStencilRef(0), mActiveDrawOp(DOT_TRIANGLE_LIST)
+		, mViewportNorm(0.0f, 0.0f, 1.0f, 1.0f)
+	{
+		mClipPlanesDirty = false; // DX11 handles clip planes through shaders
+	}
+
+	D3D11RenderAPI::~D3D11RenderAPI()
+	{
+
+	}
+
+	const StringID& D3D11RenderAPI::getName() const
+	{
+		static StringID strName("D3D11RenderAPI");
+		return strName;
+	}
+
+	const String& D3D11RenderAPI::getShadingLanguageName() const
+	{
+		static String strName("hlsl");
+		return strName;
+	}
+
+	void D3D11RenderAPI::initializePrepare()
+	{
+		THROW_IF_NOT_CORE_THREAD;
+
+		HRESULT hr = CreateDXGIFactory(__uuidof(IDXGIFactory), (void**)&mDXGIFactory);
+		if(FAILED(hr))
+			BS_EXCEPT(RenderingAPIException, "Failed to create Direct3D11 DXGIFactory");
+
+		mDriverList = bs_new<D3D11DriverList>(mDXGIFactory);
+		mActiveD3DDriver = mDriverList->item(0); // TODO: Always get first driver, for now
+		mVideoModeInfo = mActiveD3DDriver->getVideoModeInfo();
+
+		IDXGIAdapter* selectedAdapter = mActiveD3DDriver->getDeviceAdapter();
+
+		D3D_FEATURE_LEVEL requestedLevels[] = {
+			D3D_FEATURE_LEVEL_11_0,
+			D3D_FEATURE_LEVEL_10_1,
+			D3D_FEATURE_LEVEL_10_0,
+			D3D_FEATURE_LEVEL_9_3,
+			D3D_FEATURE_LEVEL_9_2,
+			D3D_FEATURE_LEVEL_9_1
+		};
+
+		UINT32 numRequestedLevel = sizeof(requestedLevels) / sizeof(requestedLevels[0]);
+
+		UINT32 deviceFlags = 0;
+
+#if BS_DEBUG_MODE
+		deviceFlags |= D3D11_CREATE_DEVICE_DEBUG;
+#endif
+
+		ID3D11Device* device;
+		hr = D3D11CreateDevice(selectedAdapter, D3D_DRIVER_TYPE_UNKNOWN, nullptr, deviceFlags, 
+			requestedLevels, numRequestedLevel, D3D11_SDK_VERSION, &device, &mFeatureLevel, nullptr);
+
+		if(FAILED(hr))         
+			BS_EXCEPT(RenderingAPIException, "Failed to create Direct3D11 object. D3D11CreateDeviceN returned this error code: " + toString(hr));
+
+		mDevice = bs_new<D3D11Device>(device);
+		
+		// This must query for DirectX 10 interface as this is unsupported for DX11
+		LARGE_INTEGER driverVersion; 
+		if(SUCCEEDED(selectedAdapter->CheckInterfaceSupport(IID_ID3D10Device, &driverVersion)))
+		{
+			mDriverVersion.major =  HIWORD(driverVersion.HighPart);
+			mDriverVersion.minor = LOWORD(driverVersion.HighPart);
+			mDriverVersion.release = HIWORD(driverVersion.LowPart);
+			mDriverVersion.build = LOWORD(driverVersion.LowPart);
+		}
+
+		// Create the texture manager for use by others		
+		TextureManager::startUp<D3D11TextureManager>();
+		TextureCoreManager::startUp<D3D11TextureCoreManager>();
+
+		// Create hardware buffer manager		
+		HardwareBufferManager::startUp();
+		HardwareBufferCoreManager::startUp<D3D11HardwareBufferCoreManager>(std::ref(*mDevice));
+
+		// Create render window manager
+		RenderWindowManager::startUp<D3D11RenderWindowManager>(this);
+		RenderWindowCoreManager::startUp<D3D11RenderWindowCoreManager>(this);
+
+		// Create & register HLSL factory		
+		mHLSLFactory = bs_new<D3D11HLSLProgramFactory>();
+
+		// Create render state manager
+		RenderStateCoreManager::startUp<D3D11RenderStateCoreManager>();
+
+		mCurrentCapabilities = createRenderSystemCapabilities();
+
+		mCurrentCapabilities->addShaderProfile("hlsl");
+		GpuProgramCoreManager::instance().addFactory(mHLSLFactory);
+
+		mIAManager = bs_new<D3D11InputLayoutManager>();
+
+		RenderAPICore::initializePrepare();
+	}
+
+	void D3D11RenderAPI::initializeFinalize(const SPtr<RenderWindowCore>& primaryWindow)
+	{
+		D3D11RenderUtility::startUp(mDevice);
+
+		QueryManager::startUp<D3D11QueryManager>();
+
+		RenderAPICore::initializeFinalize(primaryWindow);
+	}
+
+    void D3D11RenderAPI::destroyCore()
+	{
+		THROW_IF_NOT_CORE_THREAD;
+
+		for (auto& boundUAV : mBoundUAVs)
+		{
+			if (boundUAV.second != nullptr)
+				boundUAV.first->releaseView(boundUAV.second);
+		}
+
+		QueryManager::shutDown();
+		D3D11RenderUtility::shutDown();
+
+		if(mIAManager != nullptr)
+		{
+			bs_delete(mIAManager);
+			mIAManager = nullptr;
+		}
+
+		if(mHLSLFactory != nullptr)
+		{
+			bs_delete(mHLSLFactory);
+			mHLSLFactory = nullptr;
+		}
+
+		mActiveVertexDeclaration = nullptr;
+		mActiveVertexShader = nullptr;
+		mActiveRenderTarget = nullptr;
+
+		RenderStateCoreManager::shutDown();
+		RenderWindowCoreManager::shutDown();
+		RenderWindowManager::shutDown();
+		HardwareBufferCoreManager::shutDown();
+		HardwareBufferManager::shutDown();
+		TextureCoreManager::shutDown();
+		TextureManager::shutDown();
+
+		SAFE_RELEASE(mDXGIFactory);
+
+		if(mDevice != nullptr)
+		{
+			bs_delete(mDevice);
+			mDevice = nullptr;
+		}
+
+		if(mDriverList != nullptr)
+		{
+			bs_delete(mDriverList);
+			mDriverList = nullptr;
+		}
+
+		mActiveD3DDriver = nullptr;
+
+		RenderAPICore::destroyCore();
+	}
+
+	void D3D11RenderAPI::setSamplerState(GpuProgramType gptype, UINT16 texUnit, const SPtr<SamplerStateCore>& samplerState)
+	{
+		THROW_IF_NOT_CORE_THREAD;
+
+		// TODO - I'm setting up views one by one, it might be more efficient to hold them in an array
+		//  and then set them all up at once before rendering? Needs testing
+
+		ID3D11SamplerState* samplerArray[1];
+		D3D11SamplerStateCore* d3d11SamplerState = static_cast<D3D11SamplerStateCore*>(const_cast<SamplerStateCore*>(samplerState.get()));
+		samplerArray[0] = d3d11SamplerState->getInternal();
+
+		switch(gptype)
+		{
+		case GPT_VERTEX_PROGRAM:
+			mDevice->getImmediateContext()->VSSetSamplers(texUnit, 1, samplerArray);
+			break;
+		case GPT_FRAGMENT_PROGRAM:
+			mDevice->getImmediateContext()->PSSetSamplers(texUnit, 1, samplerArray);
+			break;
+		case GPT_GEOMETRY_PROGRAM:
+			mDevice->getImmediateContext()->GSSetSamplers(texUnit, 1, samplerArray);
+			break;
+		case GPT_DOMAIN_PROGRAM:
+			mDevice->getImmediateContext()->DSSetSamplers(texUnit, 1, samplerArray);
+			break;
+		case GPT_HULL_PROGRAM:
+			mDevice->getImmediateContext()->HSSetSamplers(texUnit, 1, samplerArray);
+			break;
+		case GPT_COMPUTE_PROGRAM:
+			mDevice->getImmediateContext()->CSSetSamplers(texUnit, 1, samplerArray);
+			break;
+		default:
+			BS_EXCEPT(InvalidParametersException, "Unsupported gpu program type: " + toString(gptype));
+		}
+
+		BS_INC_RENDER_STAT(NumSamplerBinds);
+	}
+
+	void D3D11RenderAPI::setBlendState(const SPtr<BlendStateCore>& blendState)
+	{
+		THROW_IF_NOT_CORE_THREAD;
+
+		D3D11BlendStateCore* d3d11BlendState = static_cast<D3D11BlendStateCore*>(const_cast<BlendStateCore*>(blendState.get()));
+		mDevice->getImmediateContext()->OMSetBlendState(d3d11BlendState->getInternal(), nullptr, 0xFFFFFFFF);
+
+		BS_INC_RENDER_STAT(NumBlendStateChanges);
+	}
+
+	void D3D11RenderAPI::setRasterizerState(const SPtr<RasterizerStateCore>& rasterizerState)
+	{
+		THROW_IF_NOT_CORE_THREAD;
+
+		D3D11RasterizerStateCore* d3d11RasterizerState = static_cast<D3D11RasterizerStateCore*>(const_cast<RasterizerStateCore*>(rasterizerState.get()));
+		mDevice->getImmediateContext()->RSSetState(d3d11RasterizerState->getInternal());
+
+		BS_INC_RENDER_STAT(NumRasterizerStateChanges);
+	}
+
+	void D3D11RenderAPI::setDepthStencilState(const SPtr<DepthStencilStateCore>& depthStencilState, UINT32 stencilRefValue)
+	{
+		THROW_IF_NOT_CORE_THREAD;
+
+		D3D11DepthStencilStateCore* d3d11RasterizerState = static_cast<D3D11DepthStencilStateCore*>(const_cast<DepthStencilStateCore*>(depthStencilState.get()));
+		mDevice->getImmediateContext()->OMSetDepthStencilState(d3d11RasterizerState->getInternal(), stencilRefValue);
+
+		BS_INC_RENDER_STAT(NumDepthStencilStateChanges);
+	}
+
+	void D3D11RenderAPI::setTexture(GpuProgramType gptype, UINT16 unit, bool enabled, const SPtr<TextureCore>& texPtr)
+	{
+		THROW_IF_NOT_CORE_THREAD;
+
+		// TODO - I'm setting up views one by one, it might be more efficient to hold them in an array
+		//  and then set them all up at once before rendering? Needs testing
+
+		ID3D11ShaderResourceView* viewArray[1];
+		if(texPtr != nullptr && enabled)
+		{
+			D3D11TextureCore* d3d11Texture = static_cast<D3D11TextureCore*>(texPtr.get());
+			viewArray[0] = d3d11Texture->getSRV();
+		}
+		else
+			viewArray[0] = nullptr;
+
+		switch(gptype)
+		{
+		case GPT_VERTEX_PROGRAM:
+			mDevice->getImmediateContext()->VSSetShaderResources(unit, 1, viewArray);
+			break;
+		case GPT_FRAGMENT_PROGRAM:
+			mDevice->getImmediateContext()->PSSetShaderResources(unit, 1, viewArray);
+			break;
+		case GPT_GEOMETRY_PROGRAM:
+			mDevice->getImmediateContext()->GSSetShaderResources(unit, 1, viewArray);
+			break;
+		case GPT_DOMAIN_PROGRAM:
+			mDevice->getImmediateContext()->DSSetShaderResources(unit, 1, viewArray);
+			break;
+		case GPT_HULL_PROGRAM:
+			mDevice->getImmediateContext()->HSSetShaderResources(unit, 1, viewArray);
+			break;
+		case GPT_COMPUTE_PROGRAM:
+			mDevice->getImmediateContext()->CSSetShaderResources(unit, 1, viewArray);
+			break;
+		default:
+			BS_EXCEPT(InvalidParametersException, "Unsupported gpu program type: " + toString(gptype));
+		}
+
+		BS_INC_RENDER_STAT(NumTextureBinds);
+	}
+
+	void D3D11RenderAPI::setLoadStoreTexture(GpuProgramType gptype, UINT16 unit, bool enabled, const SPtr<TextureCore>& texPtr,
+		const TextureSurface& surface)
+	{
+		THROW_IF_NOT_CORE_THREAD;
+
+		// TODO - This hasn't bee tested and might be incorrect. I might need to set UAVs together with render targets,
+		// especially considering DX11 expects number of UAVs to match number of render targets.
+
+		ID3D11UnorderedAccessView* viewArray[1];
+		if (texPtr != nullptr && enabled)
+		{
+			D3D11TextureCore* d3d11Texture = static_cast<D3D11TextureCore*>(texPtr.get());
+			TextureViewPtr texView = TextureCore::requestView(texPtr, surface.mipLevel, 1, 
+				surface.arraySlice, surface.numArraySlices, GVU_RANDOMWRITE);
+
+			D3D11TextureView* d3d11texView = static_cast<D3D11TextureView*>(texView.get());
+			viewArray[0] = d3d11texView->getUAV();
+
+			if (mBoundUAVs[unit].second != nullptr)
+				mBoundUAVs[unit].first->releaseView(mBoundUAVs[unit].second);
+
+			mBoundUAVs[unit] = std::make_pair(texPtr, texView);
+		}
+		else
+		{
+			viewArray[0] = nullptr;
+
+			if (mBoundUAVs[unit].second != nullptr)
+				mBoundUAVs[unit].first->releaseView(mBoundUAVs[unit].second);
+
+			mBoundUAVs[unit] = std::pair<SPtr<TextureCore>, TextureViewPtr>();
+		}
+
+		if (gptype == GPT_FRAGMENT_PROGRAM)
+		{
+			mDevice->getImmediateContext()->OMSetRenderTargetsAndUnorderedAccessViews(
+				D3D11_KEEP_RENDER_TARGETS_AND_DEPTH_STENCIL, nullptr, nullptr, unit, 1, viewArray, nullptr);
+		}
+		else if (gptype == GPT_COMPUTE_PROGRAM)
+		{
+			mDevice->getImmediateContext()->CSSetUnorderedAccessViews(unit, 1, viewArray, nullptr);
+		}
+		else
+			BS_EXCEPT(InvalidParametersException, "Unsupported gpu program type: " + toString(gptype));
+
+		BS_INC_RENDER_STAT(NumTextureBinds);
+	}
+
+	void D3D11RenderAPI::disableTextureUnit(GpuProgramType gptype, UINT16 texUnit)
+	{
+		THROW_IF_NOT_CORE_THREAD;
+
+		setTexture(gptype, texUnit, false, nullptr);
+	}
+
+	void D3D11RenderAPI::beginFrame()
+	{
+		// Not used
+	}
+
+	void D3D11RenderAPI::endFrame()
+	{
+		// Not used
+	}
+
+	void D3D11RenderAPI::setViewport(const Rect2& vp)
+	{
+		THROW_IF_NOT_CORE_THREAD;
+
+		mViewportNorm = vp;
+		applyViewport();
+	}
+
+	void D3D11RenderAPI::applyViewport()
+	{
+		if (mActiveRenderTarget == nullptr)
+			return;
+
+		const RenderTargetProperties& rtProps = mActiveRenderTarget->getProperties();
+
+		// Set viewport dimensions
+		mViewport.TopLeftX = (FLOAT)(rtProps.getWidth() * mViewportNorm.x);
+		mViewport.TopLeftY = (FLOAT)(rtProps.getHeight() * mViewportNorm.y);
+		mViewport.Width = (FLOAT)(rtProps.getWidth() * mViewportNorm.width);
+		mViewport.Height = (FLOAT)(rtProps.getHeight() * mViewportNorm.height);
+
+		if (rtProps.requiresTextureFlipping())
+		{
+			// Convert "top-left" to "bottom-left"
+			mViewport.TopLeftY = rtProps.getHeight() - mViewport.Height - mViewport.TopLeftY;
+		}
+
+		mViewport.MinDepth = 0.0f;
+		mViewport.MaxDepth = 1.0f;
+
+		mDevice->getImmediateContext()->RSSetViewports(1, &mViewport);
+	}
+
+	void D3D11RenderAPI::setVertexBuffers(UINT32 index, SPtr<VertexBufferCore>* buffers, UINT32 numBuffers)
+	{
+		THROW_IF_NOT_CORE_THREAD;
+
+		UINT32 maxBoundVertexBuffers = mCurrentCapabilities->getMaxBoundVertexBuffers();
+		if(index < 0 || (index + numBuffers) >= maxBoundVertexBuffers)
+			BS_EXCEPT(InvalidParametersException, "Invalid vertex index: " + toString(index) + ". Valid range is 0 .. " + toString(maxBoundVertexBuffers - 1));
+
+		ID3D11Buffer* dx11buffers[MAX_BOUND_VERTEX_BUFFERS];
+		UINT32 strides[MAX_BOUND_VERTEX_BUFFERS];
+		UINT32 offsets[MAX_BOUND_VERTEX_BUFFERS];
+
+		for(UINT32 i = 0; i < numBuffers; i++)
+		{
+			SPtr<D3D11VertexBufferCore> vertexBuffer = std::static_pointer_cast<D3D11VertexBufferCore>(buffers[i]);
+			const VertexBufferProperties& vbProps = vertexBuffer->getProperties();
+
+			dx11buffers[i] = vertexBuffer->getD3DVertexBuffer();
+
+			strides[i] = vbProps.getVertexSize();
+			offsets[i] = 0;
+		}
+
+		mDevice->getImmediateContext()->IASetVertexBuffers(index, numBuffers, dx11buffers, strides, offsets);
+
+		BS_INC_RENDER_STAT(NumVertexBufferBinds);
+	}
+
+	void D3D11RenderAPI::setIndexBuffer(const SPtr<IndexBufferCore>& buffer)
+	{
+		THROW_IF_NOT_CORE_THREAD;
+
+		SPtr<D3D11IndexBufferCore> indexBuffer = std::static_pointer_cast<D3D11IndexBufferCore>(buffer);
+
+		DXGI_FORMAT indexFormat = DXGI_FORMAT_R16_UINT;
+		if(indexBuffer->getProperties().getType() == IT_16BIT)
+			indexFormat = DXGI_FORMAT_R16_UINT;
+		else if (indexBuffer->getProperties().getType() == IT_32BIT)
+			indexFormat = DXGI_FORMAT_R32_UINT;
+		else
+			BS_EXCEPT(InternalErrorException, "Unsupported index format: " + toString(indexBuffer->getProperties().getType()));
+
+		mDevice->getImmediateContext()->IASetIndexBuffer(indexBuffer->getD3DIndexBuffer(), indexFormat, 0);
+
+		BS_INC_RENDER_STAT(NumIndexBufferBinds);
+	}
+
+	void D3D11RenderAPI::setVertexDeclaration(const SPtr<VertexDeclarationCore>& vertexDeclaration)
+	{
+		THROW_IF_NOT_CORE_THREAD;
+
+		mActiveVertexDeclaration = vertexDeclaration;
+	}
+
+	void D3D11RenderAPI::setDrawOperation(DrawOperationType op)
+	{
+		THROW_IF_NOT_CORE_THREAD;
+
+		mDevice->getImmediateContext()->IASetPrimitiveTopology(D3D11Mappings::getPrimitiveType(op));
+	}
+
+	void D3D11RenderAPI::bindGpuProgram(const SPtr<GpuProgramCore>& prg)
+	{
+		THROW_IF_NOT_CORE_THREAD;
+
+		switch(prg->getProperties().getType())
+		{
+		case GPT_VERTEX_PROGRAM:
+			{
+				D3D11GpuVertexProgramCore* d3d11GpuProgram = static_cast<D3D11GpuVertexProgramCore*>(prg.get());
+				mDevice->getImmediateContext()->VSSetShader(d3d11GpuProgram->getVertexShader(), nullptr, 0);
+				mActiveVertexShader = std::static_pointer_cast<D3D11GpuProgramCore>(prg);
+				break;
+			}
+		case GPT_FRAGMENT_PROGRAM:
+			{
+				D3D11GpuFragmentProgramCore* d3d11GpuProgram = static_cast<D3D11GpuFragmentProgramCore*>(prg.get());
+				mDevice->getImmediateContext()->PSSetShader(d3d11GpuProgram->getPixelShader(), nullptr, 0);
+				break;
+			}
+		case GPT_GEOMETRY_PROGRAM:
+			{
+				D3D11GpuGeometryProgramCore* d3d11GpuProgram = static_cast<D3D11GpuGeometryProgramCore*>(prg.get());
+				mDevice->getImmediateContext()->GSSetShader(d3d11GpuProgram->getGeometryShader(), nullptr, 0);
+				break;
+			}
+		case GPT_DOMAIN_PROGRAM:
+			{
+				D3D11GpuDomainProgramCore* d3d11GpuProgram = static_cast<D3D11GpuDomainProgramCore*>(prg.get());
+				mDevice->getImmediateContext()->DSSetShader(d3d11GpuProgram->getDomainShader(), nullptr, 0);
+				break;
+			}
+		case GPT_HULL_PROGRAM:
+			{
+				D3D11GpuHullProgramCore* d3d11GpuProgram = static_cast<D3D11GpuHullProgramCore*>(prg.get());
+				mDevice->getImmediateContext()->HSSetShader(d3d11GpuProgram->getHullShader(), nullptr, 0);
+				break;
+			}
+		case GPT_COMPUTE_PROGRAM:
+			{
+				D3D11GpuComputeProgramCore* d3d11GpuProgram = static_cast<D3D11GpuComputeProgramCore*>(prg.get());
+				mDevice->getImmediateContext()->CSSetShader(d3d11GpuProgram->getComputeShader(), nullptr, 0);
+				break;
+			}
+		default:
+			BS_EXCEPT(InvalidParametersException, "Unsupported gpu program type: " + toString(prg->getProperties().getType()));
+		}
+
+		if (mDevice->hasError())
+			BS_EXCEPT(RenderingAPIException, "Failed to bindGpuProgram : " + mDevice->getErrorDescription());
+
+		BS_INC_RENDER_STAT(NumGpuProgramBinds);
+	}
+
+	void D3D11RenderAPI::unbindGpuProgram(GpuProgramType gptype)
+	{
+		THROW_IF_NOT_CORE_THREAD;
+
+		switch(gptype)
+		{
+		case GPT_VERTEX_PROGRAM:
+			mDevice->getImmediateContext()->VSSetShader(nullptr, nullptr, 0);
+			mActiveVertexShader = nullptr;
+			break;
+		case GPT_FRAGMENT_PROGRAM:
+			mDevice->getImmediateContext()->PSSetShader(nullptr, nullptr, 0);
+			break;
+		case GPT_GEOMETRY_PROGRAM:
+			mDevice->getImmediateContext()->GSSetShader(nullptr, nullptr, 0);
+			break;
+		case GPT_DOMAIN_PROGRAM:
+			mDevice->getImmediateContext()->DSSetShader(nullptr, nullptr, 0);
+			break;
+		case GPT_HULL_PROGRAM:
+			mDevice->getImmediateContext()->HSSetShader(nullptr, nullptr, 0);
+			break;
+		case GPT_COMPUTE_PROGRAM:
+			mDevice->getImmediateContext()->CSSetShader(nullptr, nullptr, 0);
+			break;
+		default:
+			BS_EXCEPT(InvalidParametersException, "Unsupported gpu program type: " + toString(gptype));
+		}
+
+		BS_INC_RENDER_STAT(NumGpuProgramBinds);
+	}
+
+	void D3D11RenderAPI::setConstantBuffers(GpuProgramType gptype, const SPtr<GpuParamsCore>& bindableParams)
+	{
+		THROW_IF_NOT_CORE_THREAD;
+
+		bindableParams->updateHardwareBuffers();
+		const GpuParamDesc& paramDesc = bindableParams->getParamDesc();
+
+		// TODO - I assign constant buffers one by one but it might be more efficient to do them all at once?
+		ID3D11Buffer* bufferArray[1];
+		for(auto iter = paramDesc.paramBlocks.begin(); iter != paramDesc.paramBlocks.end(); ++iter)
+		{
+			SPtr<GpuParamBlockBufferCore> currentBlockBuffer = bindableParams->getParamBlockBuffer(iter->second.slot);
+
+			if(currentBlockBuffer != nullptr)
+			{
+				const D3D11GpuParamBlockBufferCore* d3d11paramBlockBuffer = 
+					static_cast<const D3D11GpuParamBlockBufferCore*>(currentBlockBuffer.get());
+				bufferArray[0] = d3d11paramBlockBuffer->getD3D11Buffer();
+			}
+			else
+				bufferArray[0] = nullptr;
+
+			switch(gptype)
+			{
+			case GPT_VERTEX_PROGRAM:
+				mDevice->getImmediateContext()->VSSetConstantBuffers(iter->second.slot, 1, bufferArray);
+				break;
+			case GPT_FRAGMENT_PROGRAM:
+				mDevice->getImmediateContext()->PSSetConstantBuffers(iter->second.slot, 1, bufferArray);
+				break;
+			case GPT_GEOMETRY_PROGRAM:
+				mDevice->getImmediateContext()->GSSetConstantBuffers(iter->second.slot, 1, bufferArray);
+				break;
+			case GPT_HULL_PROGRAM:
+				mDevice->getImmediateContext()->HSSetConstantBuffers(iter->second.slot, 1, bufferArray);
+				break;
+			case GPT_DOMAIN_PROGRAM:
+				mDevice->getImmediateContext()->DSSetConstantBuffers(iter->second.slot, 1, bufferArray);
+				break;
+			case GPT_COMPUTE_PROGRAM:
+				mDevice->getImmediateContext()->CSSetConstantBuffers(iter->second.slot, 1, bufferArray);
+				break;
+			};
+
+			BS_INC_RENDER_STAT(NumGpuParamBufferBinds);
+		}
+
+		if (mDevice->hasError())
+			BS_EXCEPT(RenderingAPIException, "Failed to setConstantBuffers : " + mDevice->getErrorDescription());
+	}
+
+	void D3D11RenderAPI::draw(UINT32 vertexOffset, UINT32 vertexCount)
+	{
+		THROW_IF_NOT_CORE_THREAD;
+
+		applyInputLayout();
+
+		mDevice->getImmediateContext()->Draw(vertexCount, vertexOffset);
+
+#if BS_DEBUG_MODE
+		if(mDevice->hasError())
+			LOGWRN(mDevice->getErrorDescription());
+#endif
+
+		UINT32 primCount = vertexCountToPrimCount(mActiveDrawOp, vertexCount);
+
+		BS_INC_RENDER_STAT(NumDrawCalls);
+		BS_ADD_RENDER_STAT(NumVertices, vertexCount);
+		BS_ADD_RENDER_STAT(NumPrimitives, primCount);
+	}
+
+	void D3D11RenderAPI::drawIndexed(UINT32 startIndex, UINT32 indexCount, UINT32 vertexOffset, UINT32 vertexCount)
+	{
+		THROW_IF_NOT_CORE_THREAD;
+
+		applyInputLayout();
+
+		mDevice->getImmediateContext()->DrawIndexed(indexCount, startIndex, vertexOffset);
+
+#if BS_DEBUG_MODE
+		if(mDevice->hasError())
+			LOGWRN(mDevice->getErrorDescription());
+#endif
+
+		UINT32 primCount = vertexCountToPrimCount(mActiveDrawOp, vertexCount);
+
+		BS_INC_RENDER_STAT(NumDrawCalls);
+		BS_ADD_RENDER_STAT(NumVertices, vertexCount);
+		BS_ADD_RENDER_STAT(NumPrimitives, primCount);
+	}
+
+	void D3D11RenderAPI::setScissorRect(UINT32 left, UINT32 top, UINT32 right, UINT32 bottom)
+	{
+		THROW_IF_NOT_CORE_THREAD;
+
+		mScissorRect.left = static_cast<LONG>(left);
+		mScissorRect.top = static_cast<LONG>(top);
+		mScissorRect.bottom = static_cast<LONG>(bottom);
+		mScissorRect.right = static_cast<LONG>(right);
+
+		mDevice->getImmediateContext()->RSSetScissorRects(1, &mScissorRect);
+	}
+
+	void D3D11RenderAPI::clearViewport(UINT32 buffers, const Color& color, float depth, UINT16 stencil)
+	{
+		THROW_IF_NOT_CORE_THREAD;
+
+		if(mActiveRenderTarget == nullptr)
+			return;
+
+		const RenderTargetProperties& rtProps = mActiveRenderTarget->getProperties();
+
+		Rect2I clearArea((int)mViewport.TopLeftX, (int)mViewport.TopLeftY, (int)mViewport.Width, (int)mViewport.Height);
+
+		bool clearEntireTarget = clearArea.width == 0 || clearArea.height == 0;
+		clearEntireTarget |= (clearArea.x == 0 && clearArea.y == 0 && clearArea.width == rtProps.getWidth() && clearArea.height == rtProps.getHeight());
+
+		if (!clearEntireTarget)
+		{
+			D3D11RenderUtility::instance().drawClearQuad(buffers, color, depth, stencil);
+			BS_INC_RENDER_STAT(NumClears);
+		}
+		else
+			clearRenderTarget(buffers, color, depth, stencil);
+	}
+
+	void D3D11RenderAPI::clearRenderTarget(UINT32 buffers, const Color& color, float depth, UINT16 stencil)
+	{
+		THROW_IF_NOT_CORE_THREAD;
+
+		if(mActiveRenderTarget == nullptr)
+			return;
+
+		// Clear render surfaces
+		if (buffers & FBT_COLOR)
+		{
+			UINT32 maxRenderTargets = mCurrentCapabilities->getNumMultiRenderTargets();
+
+			ID3D11RenderTargetView** views = bs_newN<ID3D11RenderTargetView*>(maxRenderTargets);
+			memset(views, 0, sizeof(ID3D11RenderTargetView*) * maxRenderTargets);
+
+			mActiveRenderTarget->getCustomAttribute("RTV", views);
+			if (!views[0])
+			{
+				bs_deleteN(views, maxRenderTargets);
+				return;
+			}
+
+			float clearColor[4];
+			clearColor[0] = color.r;
+			clearColor[1] = color.g;
+			clearColor[2] = color.b;
+			clearColor[3] = color.a;
+
+			for(UINT32 i = 0; i < maxRenderTargets; i++)
+			{
+				if(views[i] != nullptr)
+					mDevice->getImmediateContext()->ClearRenderTargetView(views[i], clearColor);
+			}
+
+			bs_deleteN(views, maxRenderTargets);
+		}
+
+		// Clear depth stencil
+		if((buffers & FBT_DEPTH) != 0 || (buffers & FBT_STENCIL) != 0)
+		{
+			ID3D11DepthStencilView* depthStencilView = nullptr;
+			mActiveRenderTarget->getCustomAttribute("DSV", &depthStencilView);
+
+			D3D11_CLEAR_FLAG clearFlag;
+
+			if((buffers & FBT_DEPTH) != 0 && (buffers & FBT_STENCIL) != 0)
+				clearFlag = (D3D11_CLEAR_FLAG)(D3D11_CLEAR_DEPTH | D3D11_CLEAR_STENCIL);
+			else if((buffers & FBT_STENCIL) != 0)
+				clearFlag = D3D11_CLEAR_STENCIL;
+			else
+				clearFlag = D3D11_CLEAR_DEPTH;
+
+			if(depthStencilView != nullptr)
+				mDevice->getImmediateContext()->ClearDepthStencilView(depthStencilView, clearFlag, depth, (UINT8)stencil);
+		}
+
+		BS_INC_RENDER_STAT(NumClears);
+	}
+
+	void D3D11RenderAPI::setRenderTarget(const SPtr<RenderTargetCore>& target)
+	{
+		THROW_IF_NOT_CORE_THREAD;
+
+		mActiveRenderTarget = target;
+
+		UINT32 maxRenderTargets = mCurrentCapabilities->getNumMultiRenderTargets();
+		ID3D11RenderTargetView** views = bs_newN<ID3D11RenderTargetView*>(maxRenderTargets);
+		memset(views, 0, sizeof(ID3D11RenderTargetView*) * maxRenderTargets);
+
+		ID3D11DepthStencilView* depthStencilView = nullptr;
+
+		if (target != nullptr)
+		{
+			target->getCustomAttribute("RTV", views);
+			target->getCustomAttribute("DSV", &depthStencilView);
+		}
+
+		// Bind render targets
+		mDevice->getImmediateContext()->OMSetRenderTargets(maxRenderTargets, views, depthStencilView);
+		if (mDevice->hasError())
+			BS_EXCEPT(RenderingAPIException, "Failed to setRenderTarget : " + mDevice->getErrorDescription());
+
+		bs_deleteN(views, maxRenderTargets);
+		applyViewport();
+
+		BS_INC_RENDER_STAT(NumRenderTargetChanges);
+	}
+
+	void D3D11RenderAPI::setClipPlanesImpl(const PlaneList& clipPlanes)
+	{
+		LOGWRN("This call will be ignored. DX11 uses shaders for setting clip planes.");
+	}
+
+	RenderAPICapabilities* D3D11RenderAPI::createRenderSystemCapabilities() const
+	{
+		THROW_IF_NOT_CORE_THREAD;
+
+		RenderAPICapabilities* rsc = bs_new<RenderAPICapabilities>();
+
+		rsc->setDriverVersion(mDriverVersion);
+		rsc->setDeviceName(mActiveD3DDriver->getDriverDescription());
+		rsc->setRenderAPIName(getName());
+
+		rsc->setStencilBufferBitDepth(8);
+
+		rsc->setCapability(RSC_ANISOTROPY);
+		rsc->setCapability(RSC_AUTOMIPMAP);
+
+		// Cube map
+		rsc->setCapability(RSC_CUBEMAPPING);
+
+		// We always support compression, D3DX will decompress if device does not support
+		rsc->setCapability(RSC_TEXTURE_COMPRESSION);
+		rsc->setCapability(RSC_TEXTURE_COMPRESSION_DXT);
+		rsc->setCapability(RSC_TWO_SIDED_STENCIL);
+		rsc->setCapability(RSC_STENCIL_WRAP);
+		rsc->setCapability(RSC_HWOCCLUSION);
+		rsc->setCapability(RSC_HWOCCLUSION_ASYNCHRONOUS);
+
+		if(mFeatureLevel >= D3D_FEATURE_LEVEL_10_1)
+			rsc->setMaxBoundVertexBuffers(32);
+		else
+			rsc->setMaxBoundVertexBuffers(16);
+
+		if(mFeatureLevel >= D3D_FEATURE_LEVEL_10_0)
+		{
+			rsc->addShaderProfile("ps_4_0");
+			rsc->addShaderProfile("vs_4_0");
+			rsc->addShaderProfile("gs_4_0");
+
+			rsc->addGpuProgramProfile(GPP_FS_4_0, "ps_4_0");
+			rsc->addGpuProgramProfile(GPP_VS_4_0, "vs_4_0");
+			rsc->addGpuProgramProfile(GPP_GS_4_0, "gs_4_0");
+
+			rsc->setNumTextureUnits(GPT_FRAGMENT_PROGRAM, D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT);
+			rsc->setNumTextureUnits(GPT_VERTEX_PROGRAM, D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT);
+			rsc->setNumTextureUnits(GPT_GEOMETRY_PROGRAM, D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT);
+
+			rsc->setNumCombinedTextureUnits(rsc->getNumTextureUnits(GPT_FRAGMENT_PROGRAM)
+				+ rsc->getNumTextureUnits(GPT_VERTEX_PROGRAM) + rsc->getNumTextureUnits(GPT_VERTEX_PROGRAM));
+
+			rsc->setNumGpuParamBlockBuffers(GPT_FRAGMENT_PROGRAM, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT);
+			rsc->setNumGpuParamBlockBuffers(GPT_VERTEX_PROGRAM, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT);
+			rsc->setNumGpuParamBlockBuffers(GPT_GEOMETRY_PROGRAM, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT);
+
+			rsc->setNumCombinedGpuParamBlockBuffers(rsc->getNumGpuParamBlockBuffers(GPT_FRAGMENT_PROGRAM)
+				+ rsc->getNumGpuParamBlockBuffers(GPT_VERTEX_PROGRAM) + rsc->getNumGpuParamBlockBuffers(GPT_VERTEX_PROGRAM));
+		}
+
+		if(mFeatureLevel >= D3D_FEATURE_LEVEL_10_1)
+		{
+			rsc->addShaderProfile("ps_4_1");
+			rsc->addShaderProfile("vs_4_1");
+			rsc->addShaderProfile("gs_4_1");
+
+			rsc->addGpuProgramProfile(GPP_FS_4_1, "ps_4_1");
+			rsc->addGpuProgramProfile(GPP_VS_4_1, "vs_4_1");
+			rsc->addGpuProgramProfile(GPP_GS_4_1, "gs_4_1");
+		}
+
+		if(mFeatureLevel >= D3D_FEATURE_LEVEL_11_0)
+		{
+			rsc->addShaderProfile("ps_5_0");
+			rsc->addShaderProfile("vs_5_0");
+			rsc->addShaderProfile("gs_5_0");
+			rsc->addShaderProfile("cs_5_0");
+			rsc->addShaderProfile("hs_5_0");
+			rsc->addShaderProfile("ds_5_0");
+
+			rsc->addGpuProgramProfile(GPP_FS_5_0, "ps_5_0");
+			rsc->addGpuProgramProfile(GPP_VS_5_0, "vs_5_0");
+			rsc->addGpuProgramProfile(GPP_GS_5_0, "gs_5_0");
+			rsc->addGpuProgramProfile(GPP_CS_5_0, "cs_5_0");
+			rsc->addGpuProgramProfile(GPP_HS_5_0, "hs_5_0");
+			rsc->addGpuProgramProfile(GPP_DS_5_0, "ds_5_0");
+
+			rsc->setNumTextureUnits(GPT_HULL_PROGRAM, D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT);
+			rsc->setNumTextureUnits(GPT_DOMAIN_PROGRAM, D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT);
+			rsc->setNumTextureUnits(GPT_COMPUTE_PROGRAM, D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT);
+
+			rsc->setNumCombinedTextureUnits(rsc->getNumTextureUnits(GPT_FRAGMENT_PROGRAM)
+				+ rsc->getNumTextureUnits(GPT_VERTEX_PROGRAM) + rsc->getNumTextureUnits(GPT_VERTEX_PROGRAM)
+				+ rsc->getNumTextureUnits(GPT_HULL_PROGRAM) + rsc->getNumTextureUnits(GPT_DOMAIN_PROGRAM)
+				+ rsc->getNumTextureUnits(GPT_COMPUTE_PROGRAM));
+
+			rsc->setNumGpuParamBlockBuffers(GPT_HULL_PROGRAM, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT);
+			rsc->setNumGpuParamBlockBuffers(GPT_DOMAIN_PROGRAM, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT);
+			rsc->setNumGpuParamBlockBuffers(GPT_COMPUTE_PROGRAM, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT);
+
+			rsc->setNumCombinedGpuParamBlockBuffers(rsc->getNumGpuParamBlockBuffers(GPT_FRAGMENT_PROGRAM)
+				+ rsc->getNumGpuParamBlockBuffers(GPT_VERTEX_PROGRAM) + rsc->getNumGpuParamBlockBuffers(GPT_VERTEX_PROGRAM)
+				+ rsc->getNumGpuParamBlockBuffers(GPT_HULL_PROGRAM) + rsc->getNumGpuParamBlockBuffers(GPT_DOMAIN_PROGRAM)
+				+ rsc->getNumGpuParamBlockBuffers(GPT_COMPUTE_PROGRAM));
+
+			rsc->setCapability(RSC_SHADER_SUBROUTINE);
+		}
+
+		rsc->setCapability(RSC_USER_CLIP_PLANES);
+		rsc->setCapability(RSC_VERTEX_FORMAT_UBYTE4);
+
+		// Adapter details
+		const DXGI_ADAPTER_DESC& adapterID = mActiveD3DDriver->getAdapterIdentifier();
+
+		// Determine vendor
+		switch(adapterID.VendorId)
+		{
+		case 0x10DE:
+			rsc->setVendor(GPU_NVIDIA);
+			break;
+		case 0x1002:
+			rsc->setVendor(GPU_AMD);
+			break;
+		case 0x163C:
+		case 0x8086:
+			rsc->setVendor(GPU_INTEL);
+			break;
+		default:
+			rsc->setVendor(GPU_UNKNOWN);
+			break;
+		};
+
+		rsc->setCapability(RSC_INFINITE_FAR_PLANE);
+
+		rsc->setCapability(RSC_TEXTURE_3D);
+		rsc->setCapability(RSC_NON_POWER_OF_2_TEXTURES);
+		rsc->setCapability(RSC_HWRENDER_TO_TEXTURE);
+		rsc->setCapability(RSC_TEXTURE_FLOAT);
+
+		rsc->setNumMultiRenderTargets(D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT);
+		rsc->setCapability(RSC_MRT_DIFFERENT_BIT_DEPTHS);
+
+		rsc->setCapability(RSC_POINT_SPRITES);
+		rsc->setCapability(RSC_POINT_EXTENDED_PARAMETERS);
+		rsc->setMaxPointSize(256);
+
+		rsc->setCapability(RSC_VERTEX_TEXTURE_FETCH);
+
+		rsc->setCapability(RSC_MIPMAP_LOD_BIAS);
+
+		rsc->setCapability(RSC_PERSTAGECONSTANT);
+
+		return rsc;
+	}
+
+	void D3D11RenderAPI::determineMultisampleSettings(UINT32 multisampleCount, DXGI_FORMAT format, DXGI_SAMPLE_DESC* outputSampleDesc)
+	{
+		bool tryCSAA = false; // Note: Disabled for now, but leaving the code for later so it might be useful
+		enum CSAAMode { CSAA_Normal, CSAA_Quality };
+		CSAAMode csaaMode = CSAA_Normal;
+
+		bool foundValid = false;
+		size_t origNumSamples = multisampleCount;
+		while (!foundValid)
+		{
+			// Deal with special cases
+			if (tryCSAA)
+			{
+				switch(multisampleCount)
+				{
+				case 8:
+					if (csaaMode == CSAA_Quality)
+					{
+						outputSampleDesc->Count = 8;
+						outputSampleDesc->Quality = 8;
+					}
+					else
+					{
+						outputSampleDesc->Count = 4;
+						outputSampleDesc->Quality = 8;
+					}
+					break;
+				case 16:
+					if (csaaMode == CSAA_Quality)
+					{
+						outputSampleDesc->Count = 8;
+						outputSampleDesc->Quality = 16;
+					}
+					else
+					{
+						outputSampleDesc->Count = 4;
+						outputSampleDesc->Quality = 16;
+					}
+					break;
+				}
+			}
+			else // !CSAA
+			{
+				outputSampleDesc->Count = multisampleCount == 0 ? 1 : multisampleCount;
+				outputSampleDesc->Quality = D3D11_STANDARD_MULTISAMPLE_PATTERN;
+			}
+
+
+			HRESULT hr;
+			UINT outQuality;
+			hr = mDevice->getD3D11Device()->CheckMultisampleQualityLevels(format, outputSampleDesc->Count, &outQuality);
+
+			if (SUCCEEDED(hr) && (!tryCSAA || outQuality > outputSampleDesc->Quality))
+			{
+				foundValid = true;
+			}
+			else
+			{
+				// Downgrade
+				if (tryCSAA && multisampleCount == 8)
+				{
+					// For CSAA, we'll try downgrading with quality mode at all samples.
+					// then try without quality, then drop CSAA
+					if (csaaMode == CSAA_Quality)
+					{
+						// Drop quality first
+						csaaMode = CSAA_Normal;
+					}
+					else
+					{
+						// Drop CSAA entirely 
+						tryCSAA = false;
+					}
+
+					// Return to original requested samples
+					multisampleCount = static_cast<UINT32>(origNumSamples);
+				}
+				else
+				{
+					// Drop samples
+					multisampleCount--;
+
+					if (multisampleCount == 1)
+					{
+						// Ran out of options, no multisampling
+						multisampleCount = 0;
+						foundValid = true;
+					}
+				}
+			}
+		} 
+	}
+
+	VertexElementType D3D11RenderAPI::getColorVertexElementType() const
+	{
+		return VET_COLOR_ABGR;
+	}
+
+	void D3D11RenderAPI::convertProjectionMatrix(const Matrix4& matrix, Matrix4& dest)
+	{
+		dest = matrix;
+
+		// Convert depth range from [-1,+1] to [0,1]
+		dest[2][0] = (dest[2][0] + dest[3][0]) / 2;
+		dest[2][1] = (dest[2][1] + dest[3][1]) / 2;
+		dest[2][2] = (dest[2][2] + dest[3][2]) / 2;
+		dest[2][3] = (dest[2][3] + dest[3][3]) / 2;
+	}
+
+	float D3D11RenderAPI::getHorizontalTexelOffset()
+	{
+		return 0.0f;
+	}
+
+	float D3D11RenderAPI::getVerticalTexelOffset()
+	{
+		return 0.0f;
+	}
+
+	float D3D11RenderAPI::getMinimumDepthInputValue()
+	{
+		return 0.0f;
+	}
+
+	float D3D11RenderAPI::getMaximumDepthInputValue()
+	{
+		return 1.0f;
+	}
+
+	GpuParamBlockDesc D3D11RenderAPI::generateParamBlockDesc(const String& name, Vector<GpuParamDataDesc>& params)
+	{
+		GpuParamBlockDesc block;
+		block.blockSize = 0;
+		block.isShareable = true;
+		block.name = name;
+		block.slot = 0;
+
+		for (auto& param : params)
+		{
+			const GpuParamDataTypeInfo& typeInfo = GpuParams::PARAM_SIZES.lookup[param.type];
+			UINT32 size = typeInfo.size / 4;
+
+			if (param.arraySize > 1)
+			{
+				// Arrays perform no packing and their elements are always padded and aligned to four component vectors
+				UINT32 alignOffset = size % typeInfo.baseTypeSize;
+				if (alignOffset != 0)
+				{
+					UINT32 padding = (typeInfo.baseTypeSize - alignOffset);
+					size += padding;
+				}
+
+				alignOffset = block.blockSize % typeInfo.baseTypeSize;
+				if (alignOffset != 0)
+				{
+					UINT32 padding = (typeInfo.baseTypeSize - alignOffset);
+					block.blockSize += padding;
+				}
+
+				param.elementSize = size;
+				param.arrayElementStride = size;
+				param.cpuMemOffset = block.blockSize;
+				param.gpuMemOffset = 0;
+
+				block.blockSize += size * param.arraySize;
+			}
+			else
+			{
+				// Pack everything as tightly as possible as long as the data doesn't cross 16 byte boundary
+				UINT32 alignOffset = block.blockSize % 4;
+				if (alignOffset != 0 && size > (4 - alignOffset))
+				{
+					UINT32 padding = (4 - alignOffset);
+					block.blockSize += padding;
+				}
+
+				param.elementSize = size;
+				param.arrayElementStride = size;
+				param.cpuMemOffset = block.blockSize;
+				param.gpuMemOffset = 0;
+
+				block.blockSize += size;
+			}
+
+			param.paramBlockSlot = 0;
+		}
+
+		// Constant buffer size must always be a multiple of 16
+		if (block.blockSize % 4 != 0)
+			block.blockSize += (4 - (block.blockSize % 4));
+
+		return block;
+	}
+
+	/************************************************************************/
+	/* 								PRIVATE		                     		*/
+	/************************************************************************/
+
+	void D3D11RenderAPI::applyInputLayout()
+	{
+		if(mActiveVertexDeclaration == nullptr)
+		{
+			LOGWRN("Cannot apply input layout without a vertex declaration. Set vertex declaration before calling this method.");
+			return;
+		}
+
+		if(mActiveVertexShader == nullptr)
+		{
+			LOGWRN("Cannot apply input layout without a vertex shader. Set vertex shader before calling this method.");
+			return;
+		}
+
+		ID3D11InputLayout* ia = mIAManager->retrieveInputLayout(mActiveVertexShader->getInputDeclaration(), mActiveVertexDeclaration, *mActiveVertexShader);
+
+		mDevice->getImmediateContext()->IASetInputLayout(ia);
+	}
 }

+ 369 - 368
BansheeD3D9RenderAPI/Include/BsD3D9HLSLParamParser.h

@@ -1,369 +1,370 @@
-#pragma once
-
-#include "BsD3D9Prerequisites.h"
-#include "BsD3D9GpuProgram.h"
-
-namespace BansheeEngine
-{
-	/**
-	 * @brief	Helper class that parses GPU program constant table and returns parameters used 
-	 *			by the program, as well as their type, size and other information.
-	 */
-	class D3D9HLSLParamParser
-	{
-	public:
-		/**
-		 * @brief	Initializes the parameter parser with the specified constant table, and an optional list
-		 *			of parameter blocks. DirectX 9 does not support parameter blocks internally, but
-		 *			we can emulate the functionality by providing a list of user-defined blocks and
-		 *			the parameters they contain.
-		 */
-		D3D9HLSLParamParser(LPD3DXCONSTANTTABLE constTable, const Vector<D3D9EmulatedParamBlock>& blocks)
-			:mpConstTable(constTable), mBlocks(blocks)
-		{ }
-
-		/**
-		 * @brief	Builds parameter descriptions and returns an object containing all relevant information.
-		 */
-		GpuParamDescPtr buildParameterDescriptions();
-
-	private:
-		/**
-		 * @brief	Determines information about the specified parameter and places it in the provided 
-		 *			parameter block, as well as any children of the parameter.
-		 *
-		 * @param	blockDesc	Parent block into which to add the new parameter.
-		 * @param	paramName	Name of the parameter.
-		 * @param	constant	Parameter handle in the constant table.
-		 * @param	prefix		Prefix to append to the parameter and any child parameters.	
-		 */
-		void processParameter(GpuParamBlockDesc& blockDesc, const String& paramName, D3DXHANDLE constant, String prefix);
-
-		/**
-		 * @brief	Populates information about the parameter in "memberDesc" from the data in d3dDesc. Esentially converts
-		 *			DX9 parameter data to engine data.
-		 */
-		void populateParamMemberDesc(GpuParamDataDesc& memberDesc, D3DXCONSTANT_DESC& d3dDesc);
-
-		/**
-		 * @brief	Returns the name of the parameter with the specified constant table handle.
-		 */
-		String getParamName(D3DXHANDLE constant);
-
-	private:
-		LPD3DXCONSTANTTABLE mpConstTable;
-		Vector<D3D9EmulatedParamBlock> mBlocks;
-		GpuParamDescPtr mParamDesc;
-	};
-
-	String D3D9HLSLParamParser::getParamName(D3DXHANDLE constant)
-	{
-		D3DXCONSTANT_DESC desc;
-		UINT32 numParams = 1;
-		HRESULT hr = mpConstTable->GetConstantDesc(constant, &desc, &numParams);
-		if (FAILED(hr))
-		{
-			BS_EXCEPT(InternalErrorException, "Cannot retrieve constant description from HLSL program.");
-		}
-
-		String paramName = desc.Name;
-		// trim the odd '$' which appears at the start of the names in HLSL
-		if (paramName.at(0) == '$')
-			paramName.erase(paramName.begin());
-
-		// Also trim the '[0]' suffix if it exists, we will add our own indexing later
-		if (StringUtil::endsWith(paramName, "[0]", false))
-			paramName.erase(paramName.size() - 3);
-
-		return paramName;
-	}
-
-	GpuParamDescPtr D3D9HLSLParamParser::buildParameterDescriptions()
-	{
-		// Derive parameter names from const table
-		assert(mpConstTable && "Program not loaded!");
-
-		mParamDesc = bs_shared_ptr_new<GpuParamDesc>();
-
-		// Get contents of the constant table
-		D3DXCONSTANTTABLE_DESC desc;
-		HRESULT hr = mpConstTable->GetDesc(&desc);
-
-		if (FAILED(hr))
-			BS_EXCEPT(InternalErrorException, "Cannot retrieve constant descriptions from HLSL program.");
-
-		// DX9 has no concept of parameter blocks so we emulate them if needed
-		String name = "BS_INTERNAL_Globals";
-		mParamDesc->paramBlocks.insert(std::make_pair(name, GpuParamBlockDesc()));
-		GpuParamBlockDesc& globalBlockDesc = mParamDesc->paramBlocks[name];
-		globalBlockDesc.name = name;
-		globalBlockDesc.slot = 0;
-		globalBlockDesc.blockSize = 0;
-		globalBlockDesc.isShareable = false;
-
-		UnorderedMap<String, String> nonGlobalBlocks;
-		UINT32 curSlot = 1;
-		for (auto& block : mBlocks)
-		{
-			mParamDesc->paramBlocks.insert(std::make_pair(block.blockName, GpuParamBlockDesc()));
-			GpuParamBlockDesc& blockDesc = mParamDesc->paramBlocks[block.blockName];
-			blockDesc.name = block.blockName;
-			blockDesc.slot = curSlot++;
-			blockDesc.blockSize = 0;
-			blockDesc.isShareable = true;
-
-			for (auto& fieldName : block.paramNames)
-			{
-				nonGlobalBlocks.insert(std::make_pair(fieldName, block.blockName));
-			}
-		}
-
-		// Iterate over the constants
-		for (UINT32 i = 0; i < desc.Constants; ++i)
-		{
-			D3DXHANDLE constantHandle = mpConstTable->GetConstant(NULL, i);
-			String paramName = getParamName(constantHandle);
-
-			// Recursively descend through the structure levels
-			auto findIter = nonGlobalBlocks.find(paramName);
-			if (findIter == nonGlobalBlocks.end())
-				processParameter(globalBlockDesc, paramName, constantHandle, "");
-			else
-				processParameter(mParamDesc->paramBlocks[findIter->second], paramName, constantHandle, "");
-		}
-
-		return mParamDesc;
-	}
-
-	void D3D9HLSLParamParser::processParameter(GpuParamBlockDesc& blockDesc, const String& paramName, D3DXHANDLE constant, String prefix)
-	{
-		// Since D3D HLSL doesn't deal with naming of array and struct parameters
-		// automatically, we have to do it by hand
-
-		D3DXCONSTANT_DESC desc;
-		UINT32 numParams = 1;
-		HRESULT hr = mpConstTable->GetConstantDesc(constant, &desc, &numParams);
-		if (FAILED(hr))
-		{
-			BS_EXCEPT(InternalErrorException, "Cannot retrieve constant description from HLSL program.");
-		}
-
-		if (desc.Class == D3DXPC_STRUCT)
-		{
-			// work out a new prefix for nested members, if it's an array, we need an index
-			prefix = prefix + paramName + ".";
-			// Cascade into struct
-			for (UINT32 i = 0; i < desc.StructMembers; ++i)
-			{
-				D3DXHANDLE childHandle = mpConstTable->GetConstant(constant, i);
-				String childParamName = getParamName(childHandle);
-
-				processParameter(blockDesc, childParamName, childHandle, prefix);
-			}
-		}
-		else
-		{
-			// Process params
-			if (desc.Type == D3DXPT_FLOAT || desc.Type == D3DXPT_INT || desc.Type == D3DXPT_BOOL)
-			{
-				GpuParamDataDesc memberDesc;
-				memberDesc.gpuMemOffset = desc.RegisterIndex;
-				memberDesc.cpuMemOffset = blockDesc.blockSize;
-				memberDesc.paramBlockSlot = blockDesc.slot;
-				memberDesc.arraySize = 1;
-
-				String name = prefix + paramName;
-				memberDesc.name = name;
-
-				populateParamMemberDesc(memberDesc, desc);
-				mParamDesc->params.insert(std::make_pair(name, memberDesc));
-
-				blockDesc.blockSize += memberDesc.arrayElementStride * memberDesc.arraySize;
-			}
-			else if(desc.Type == D3DXPT_SAMPLER1D || desc.Type == D3DXPT_SAMPLER2D || desc.Type == D3DXPT_SAMPLER3D || desc.Type == D3DXPT_SAMPLERCUBE)
-			{
-				GpuParamObjectDesc samplerDesc;
-				samplerDesc.name = paramName;
-				samplerDesc.slot = desc.RegisterIndex;
-
-				GpuParamObjectDesc textureDesc;
-				textureDesc.name = paramName;
-				textureDesc.slot = desc.RegisterIndex;
-
-				switch(desc.Type)
-				{
-				case D3DXPT_SAMPLER1D:
-					samplerDesc.type = GPOT_SAMPLER1D;
-					textureDesc.type = GPOT_TEXTURE1D;
-					break;
-				case D3DXPT_SAMPLER2D:
-					samplerDesc.type = GPOT_SAMPLER2D;
-					textureDesc.type = GPOT_TEXTURE2D;
-					break;
-				case D3DXPT_SAMPLER3D:
-					samplerDesc.type = GPOT_SAMPLER3D;
-					textureDesc.type = GPOT_TEXTURE3D;
-					break;
-				case D3DXPT_SAMPLERCUBE:
-					samplerDesc.type = GPOT_SAMPLERCUBE;
-					textureDesc.type = GPOT_TEXTURECUBE;
-					break;
-				default:
-					BS_EXCEPT(InternalErrorException, "Invalid sampler type: " + toString(desc.Type) + " for parameter " + paramName);
-				}
-
-				mParamDesc->samplers.insert(std::make_pair(paramName, samplerDesc));
-				mParamDesc->textures.insert(std::make_pair(paramName, textureDesc));
-			}
-			else
-			{
-				BS_EXCEPT(InternalErrorException, "Invalid shader parameter type: " + toString(desc.Type) + " for parameter " + paramName);
-			}
-		}
-	}
-
-	void D3D9HLSLParamParser::populateParamMemberDesc(GpuParamDataDesc& memberDesc, D3DXCONSTANT_DESC& d3dDesc)
-	{
-		memberDesc.arraySize = d3dDesc.Elements;
-		switch(d3dDesc.Type)
-		{
-		case D3DXPT_INT:
-			switch(d3dDesc.Columns)
-			{
-			case 1:
-				memberDesc.type = GPDT_INT1;
-				break;
-			case 2:
-				memberDesc.type = GPDT_INT2;
-				break;
-			case 3:
-				memberDesc.type = GPDT_INT3;
-				break;
-			case 4:
-				memberDesc.type = GPDT_INT4;
-				break;
-			} 
-
-			memberDesc.elementSize = 4;
-			memberDesc.arrayElementStride = 4;
-
-			break;
-		case D3DXPT_FLOAT:
-			switch(d3dDesc.Class)
-			{
-			case D3DXPC_MATRIX_COLUMNS:
-			case D3DXPC_MATRIX_ROWS:
-				{
-					int firstDim, secondDim;
-					int firstActualDim; // Actual size might be less than requested because of optimization, we need to know both
-					firstActualDim = d3dDesc.RegisterCount / d3dDesc.Elements;
-
-					if (d3dDesc.Class == D3DXPC_MATRIX_ROWS)
-					{
-						firstDim = d3dDesc.Rows;
-						secondDim = d3dDesc.Columns;
-					}
-					else
-					{
-						firstDim = d3dDesc.Columns;
-						secondDim = d3dDesc.Rows;
-					}
-
-					switch (firstActualDim)
-					{
-					case 2:
-						memberDesc.elementSize = 8; // HLSL always padds regardless of row size
-						memberDesc.arrayElementStride = 8;
-						break;
-					case 3:
-						memberDesc.elementSize = 12; // HLSL always padds regardless of row size
-						memberDesc.arrayElementStride = 12;
-						break;
-					case 4:
-						memberDesc.elementSize = 16; // HLSL always padds regardless of row size
-						memberDesc.arrayElementStride = 16;
-						break;
-
-					}
-
-					switch (firstDim)
-					{
-					case 2:
-						switch (secondDim)
-						{
-						case 2:
-							memberDesc.type = GPDT_MATRIX_2X2;
-							break;
-						case 3:
-							memberDesc.type = GPDT_MATRIX_2X3;
-							break;
-						case 4:
-							memberDesc.type = GPDT_MATRIX_2X4;
-							break;
-						} 
-						break;
-					case 3:
-						switch (secondDim)
-						{
-						case 2:
-							memberDesc.type = GPDT_MATRIX_3X2;
-							break;
-						case 3:
-							memberDesc.type = GPDT_MATRIX_3X3;
-							break;
-						case 4:
-							memberDesc.type = GPDT_MATRIX_3X4;
-							break;
-						} 
-						break;
-					case 4:
-						switch (secondDim)
-						{
-						case 2:
-							memberDesc.type = GPDT_MATRIX_4X2;
-							break;
-						case 3:
-							memberDesc.type = GPDT_MATRIX_4X3;
-							break;
-						case 4:
-							memberDesc.type = GPDT_MATRIX_4X4;
-							break;
-						}
-						break;
-					}
-				}
-				break;
-			case D3DXPC_SCALAR:
-			case D3DXPC_VECTOR:
-				switch(d3dDesc.Columns)
-				{
-				case 1:
-					memberDesc.type = GPDT_FLOAT1;
-					break;
-				case 2:
-					memberDesc.type = GPDT_FLOAT2;
-					break;
-				case 3:
-					memberDesc.type = GPDT_FLOAT3;
-					break;
-				case 4:
-					memberDesc.type = GPDT_FLOAT4;
-					break;
-				}
-
-				memberDesc.elementSize = 4;
-				memberDesc.arrayElementStride = 4;
-
-				break;
-			}
-			break;
-		case D3DXPT_BOOL:
-			memberDesc.type = GPDT_BOOL;
-			memberDesc.elementSize = 4;
-			memberDesc.arrayElementStride = 4;
-			break;
-		default:
-			break;
-		};
-	}
+#pragma once
+
+#include "BsD3D9Prerequisites.h"
+#include "BsD3D9GpuProgram.h"
+#include "BsGpuParamDesc.h"
+
+namespace BansheeEngine
+{
+	/**
+	 * @brief	Helper class that parses GPU program constant table and returns parameters used 
+	 *			by the program, as well as their type, size and other information.
+	 */
+	class D3D9HLSLParamParser
+	{
+	public:
+		/**
+		 * @brief	Initializes the parameter parser with the specified constant table, and an optional list
+		 *			of parameter blocks. DirectX 9 does not support parameter blocks internally, but
+		 *			we can emulate the functionality by providing a list of user-defined blocks and
+		 *			the parameters they contain.
+		 */
+		D3D9HLSLParamParser(LPD3DXCONSTANTTABLE constTable, const Vector<D3D9EmulatedParamBlock>& blocks)
+			:mpConstTable(constTable), mBlocks(blocks)
+		{ }
+
+		/**
+		 * @brief	Builds parameter descriptions and returns an object containing all relevant information.
+		 */
+		GpuParamDescPtr buildParameterDescriptions();
+
+	private:
+		/**
+		 * @brief	Determines information about the specified parameter and places it in the provided 
+		 *			parameter block, as well as any children of the parameter.
+		 *
+		 * @param	blockDesc	Parent block into which to add the new parameter.
+		 * @param	paramName	Name of the parameter.
+		 * @param	constant	Parameter handle in the constant table.
+		 * @param	prefix		Prefix to append to the parameter and any child parameters.	
+		 */
+		void processParameter(GpuParamBlockDesc& blockDesc, const String& paramName, D3DXHANDLE constant, String prefix);
+
+		/**
+		 * @brief	Populates information about the parameter in "memberDesc" from the data in d3dDesc. Esentially converts
+		 *			DX9 parameter data to engine data.
+		 */
+		void populateParamMemberDesc(GpuParamDataDesc& memberDesc, D3DXCONSTANT_DESC& d3dDesc);
+
+		/**
+		 * @brief	Returns the name of the parameter with the specified constant table handle.
+		 */
+		String getParamName(D3DXHANDLE constant);
+
+	private:
+		LPD3DXCONSTANTTABLE mpConstTable;
+		Vector<D3D9EmulatedParamBlock> mBlocks;
+		GpuParamDescPtr mParamDesc;
+	};
+
+	String D3D9HLSLParamParser::getParamName(D3DXHANDLE constant)
+	{
+		D3DXCONSTANT_DESC desc;
+		UINT32 numParams = 1;
+		HRESULT hr = mpConstTable->GetConstantDesc(constant, &desc, &numParams);
+		if (FAILED(hr))
+		{
+			BS_EXCEPT(InternalErrorException, "Cannot retrieve constant description from HLSL program.");
+		}
+
+		String paramName = desc.Name;
+		// trim the odd '$' which appears at the start of the names in HLSL
+		if (paramName.at(0) == '$')
+			paramName.erase(paramName.begin());
+
+		// Also trim the '[0]' suffix if it exists, we will add our own indexing later
+		if (StringUtil::endsWith(paramName, "[0]", false))
+			paramName.erase(paramName.size() - 3);
+
+		return paramName;
+	}
+
+	GpuParamDescPtr D3D9HLSLParamParser::buildParameterDescriptions()
+	{
+		// Derive parameter names from const table
+		assert(mpConstTable && "Program not loaded!");
+
+		mParamDesc = bs_shared_ptr_new<GpuParamDesc>();
+
+		// Get contents of the constant table
+		D3DXCONSTANTTABLE_DESC desc;
+		HRESULT hr = mpConstTable->GetDesc(&desc);
+
+		if (FAILED(hr))
+			BS_EXCEPT(InternalErrorException, "Cannot retrieve constant descriptions from HLSL program.");
+
+		// DX9 has no concept of parameter blocks so we emulate them if needed
+		String name = "BS_INTERNAL_Globals";
+		mParamDesc->paramBlocks.insert(std::make_pair(name, GpuParamBlockDesc()));
+		GpuParamBlockDesc& globalBlockDesc = mParamDesc->paramBlocks[name];
+		globalBlockDesc.name = name;
+		globalBlockDesc.slot = 0;
+		globalBlockDesc.blockSize = 0;
+		globalBlockDesc.isShareable = false;
+
+		UnorderedMap<String, String> nonGlobalBlocks;
+		UINT32 curSlot = 1;
+		for (auto& block : mBlocks)
+		{
+			mParamDesc->paramBlocks.insert(std::make_pair(block.blockName, GpuParamBlockDesc()));
+			GpuParamBlockDesc& blockDesc = mParamDesc->paramBlocks[block.blockName];
+			blockDesc.name = block.blockName;
+			blockDesc.slot = curSlot++;
+			blockDesc.blockSize = 0;
+			blockDesc.isShareable = true;
+
+			for (auto& fieldName : block.paramNames)
+			{
+				nonGlobalBlocks.insert(std::make_pair(fieldName, block.blockName));
+			}
+		}
+
+		// Iterate over the constants
+		for (UINT32 i = 0; i < desc.Constants; ++i)
+		{
+			D3DXHANDLE constantHandle = mpConstTable->GetConstant(NULL, i);
+			String paramName = getParamName(constantHandle);
+
+			// Recursively descend through the structure levels
+			auto findIter = nonGlobalBlocks.find(paramName);
+			if (findIter == nonGlobalBlocks.end())
+				processParameter(globalBlockDesc, paramName, constantHandle, "");
+			else
+				processParameter(mParamDesc->paramBlocks[findIter->second], paramName, constantHandle, "");
+		}
+
+		return mParamDesc;
+	}
+
+	void D3D9HLSLParamParser::processParameter(GpuParamBlockDesc& blockDesc, const String& paramName, D3DXHANDLE constant, String prefix)
+	{
+		// Since D3D HLSL doesn't deal with naming of array and struct parameters
+		// automatically, we have to do it by hand
+
+		D3DXCONSTANT_DESC desc;
+		UINT32 numParams = 1;
+		HRESULT hr = mpConstTable->GetConstantDesc(constant, &desc, &numParams);
+		if (FAILED(hr))
+		{
+			BS_EXCEPT(InternalErrorException, "Cannot retrieve constant description from HLSL program.");
+		}
+
+		if (desc.Class == D3DXPC_STRUCT)
+		{
+			// work out a new prefix for nested members, if it's an array, we need an index
+			prefix = prefix + paramName + ".";
+			// Cascade into struct
+			for (UINT32 i = 0; i < desc.StructMembers; ++i)
+			{
+				D3DXHANDLE childHandle = mpConstTable->GetConstant(constant, i);
+				String childParamName = getParamName(childHandle);
+
+				processParameter(blockDesc, childParamName, childHandle, prefix);
+			}
+		}
+		else
+		{
+			// Process params
+			if (desc.Type == D3DXPT_FLOAT || desc.Type == D3DXPT_INT || desc.Type == D3DXPT_BOOL)
+			{
+				GpuParamDataDesc memberDesc;
+				memberDesc.gpuMemOffset = desc.RegisterIndex;
+				memberDesc.cpuMemOffset = blockDesc.blockSize;
+				memberDesc.paramBlockSlot = blockDesc.slot;
+				memberDesc.arraySize = 1;
+
+				String name = prefix + paramName;
+				memberDesc.name = name;
+
+				populateParamMemberDesc(memberDesc, desc);
+				mParamDesc->params.insert(std::make_pair(name, memberDesc));
+
+				blockDesc.blockSize += memberDesc.arrayElementStride * memberDesc.arraySize;
+			}
+			else if(desc.Type == D3DXPT_SAMPLER1D || desc.Type == D3DXPT_SAMPLER2D || desc.Type == D3DXPT_SAMPLER3D || desc.Type == D3DXPT_SAMPLERCUBE)
+			{
+				GpuParamObjectDesc samplerDesc;
+				samplerDesc.name = paramName;
+				samplerDesc.slot = desc.RegisterIndex;
+
+				GpuParamObjectDesc textureDesc;
+				textureDesc.name = paramName;
+				textureDesc.slot = desc.RegisterIndex;
+
+				switch(desc.Type)
+				{
+				case D3DXPT_SAMPLER1D:
+					samplerDesc.type = GPOT_SAMPLER1D;
+					textureDesc.type = GPOT_TEXTURE1D;
+					break;
+				case D3DXPT_SAMPLER2D:
+					samplerDesc.type = GPOT_SAMPLER2D;
+					textureDesc.type = GPOT_TEXTURE2D;
+					break;
+				case D3DXPT_SAMPLER3D:
+					samplerDesc.type = GPOT_SAMPLER3D;
+					textureDesc.type = GPOT_TEXTURE3D;
+					break;
+				case D3DXPT_SAMPLERCUBE:
+					samplerDesc.type = GPOT_SAMPLERCUBE;
+					textureDesc.type = GPOT_TEXTURECUBE;
+					break;
+				default:
+					BS_EXCEPT(InternalErrorException, "Invalid sampler type: " + toString(desc.Type) + " for parameter " + paramName);
+				}
+
+				mParamDesc->samplers.insert(std::make_pair(paramName, samplerDesc));
+				mParamDesc->textures.insert(std::make_pair(paramName, textureDesc));
+			}
+			else
+			{
+				BS_EXCEPT(InternalErrorException, "Invalid shader parameter type: " + toString(desc.Type) + " for parameter " + paramName);
+			}
+		}
+	}
+
+	void D3D9HLSLParamParser::populateParamMemberDesc(GpuParamDataDesc& memberDesc, D3DXCONSTANT_DESC& d3dDesc)
+	{
+		memberDesc.arraySize = d3dDesc.Elements;
+		switch(d3dDesc.Type)
+		{
+		case D3DXPT_INT:
+			switch(d3dDesc.Columns)
+			{
+			case 1:
+				memberDesc.type = GPDT_INT1;
+				break;
+			case 2:
+				memberDesc.type = GPDT_INT2;
+				break;
+			case 3:
+				memberDesc.type = GPDT_INT3;
+				break;
+			case 4:
+				memberDesc.type = GPDT_INT4;
+				break;
+			} 
+
+			memberDesc.elementSize = 4;
+			memberDesc.arrayElementStride = 4;
+
+			break;
+		case D3DXPT_FLOAT:
+			switch(d3dDesc.Class)
+			{
+			case D3DXPC_MATRIX_COLUMNS:
+			case D3DXPC_MATRIX_ROWS:
+				{
+					int firstDim, secondDim;
+					int firstActualDim; // Actual size might be less than requested because of optimization, we need to know both
+					firstActualDim = d3dDesc.RegisterCount / d3dDesc.Elements;
+
+					if (d3dDesc.Class == D3DXPC_MATRIX_ROWS)
+					{
+						firstDim = d3dDesc.Rows;
+						secondDim = d3dDesc.Columns;
+					}
+					else
+					{
+						firstDim = d3dDesc.Columns;
+						secondDim = d3dDesc.Rows;
+					}
+
+					switch (firstActualDim)
+					{
+					case 2:
+						memberDesc.elementSize = 8; // HLSL always padds regardless of row size
+						memberDesc.arrayElementStride = 8;
+						break;
+					case 3:
+						memberDesc.elementSize = 12; // HLSL always padds regardless of row size
+						memberDesc.arrayElementStride = 12;
+						break;
+					case 4:
+						memberDesc.elementSize = 16; // HLSL always padds regardless of row size
+						memberDesc.arrayElementStride = 16;
+						break;
+
+					}
+
+					switch (firstDim)
+					{
+					case 2:
+						switch (secondDim)
+						{
+						case 2:
+							memberDesc.type = GPDT_MATRIX_2X2;
+							break;
+						case 3:
+							memberDesc.type = GPDT_MATRIX_2X3;
+							break;
+						case 4:
+							memberDesc.type = GPDT_MATRIX_2X4;
+							break;
+						} 
+						break;
+					case 3:
+						switch (secondDim)
+						{
+						case 2:
+							memberDesc.type = GPDT_MATRIX_3X2;
+							break;
+						case 3:
+							memberDesc.type = GPDT_MATRIX_3X3;
+							break;
+						case 4:
+							memberDesc.type = GPDT_MATRIX_3X4;
+							break;
+						} 
+						break;
+					case 4:
+						switch (secondDim)
+						{
+						case 2:
+							memberDesc.type = GPDT_MATRIX_4X2;
+							break;
+						case 3:
+							memberDesc.type = GPDT_MATRIX_4X3;
+							break;
+						case 4:
+							memberDesc.type = GPDT_MATRIX_4X4;
+							break;
+						}
+						break;
+					}
+				}
+				break;
+			case D3DXPC_SCALAR:
+			case D3DXPC_VECTOR:
+				switch(d3dDesc.Columns)
+				{
+				case 1:
+					memberDesc.type = GPDT_FLOAT1;
+					break;
+				case 2:
+					memberDesc.type = GPDT_FLOAT2;
+					break;
+				case 3:
+					memberDesc.type = GPDT_FLOAT3;
+					break;
+				case 4:
+					memberDesc.type = GPDT_FLOAT4;
+					break;
+				}
+
+				memberDesc.elementSize = 4;
+				memberDesc.arrayElementStride = 4;
+
+				break;
+			}
+			break;
+		case D3DXPT_BOOL:
+			memberDesc.type = GPDT_BOOL;
+			memberDesc.elementSize = 4;
+			memberDesc.arrayElementStride = 4;
+			break;
+		default:
+			break;
+		};
+	}
 }

+ 1 - 3
BansheeGLRenderAPI/Source/BsGLRenderAPI.cpp

@@ -1,15 +1,12 @@
 #include "BsGLRenderAPI.h"
 #include "BsRenderAPI.h"
 #include "BsGLTextureManager.h"
-#include "BsGLVertexBuffer.h"
 #include "BsGLIndexBuffer.h"
 #include "BsGLUtil.h"
 #include "BsGLSLGpuProgram.h"
 #include "BsException.h"
-#include "BsGLOcclusionQuery.h"
 #include "BsGLContext.h"
 #include "BsGLSupport.h"
-#include "BsAsyncOp.h"
 #include "BsBlendState.h"
 #include "BsRasterizerState.h"
 #include "BsDepthStencilState.h"
@@ -24,6 +21,7 @@
 #include "BsGLQueryManager.h"
 #include "BsDebug.h"
 #include "BsRenderStats.h"
+#include "BsGpuParamDesc.h"
 
 namespace BansheeEngine 
 {

+ 6 - 0
BansheeUtility/Include/BsPrerequisitesUtil.h

@@ -62,6 +62,12 @@
 
 /** @} */
 
+/** @defgroup Implementation Implementation
+ *	Contains various base and helper types that used by an implementation of some other type. These shouldn't even be part
+ *  of the class list but due to limitations in the documentation generation system they need to be somewhere. All elements 
+ *  listed here should instead be found by browsing the public interfaces of the types that use them.
+ */
+
 // 0 - No thread support
 // 1 - Render system is thread safe (TODO: NOT WORKING and will probably be removed)
 // 2 - Thread support but render system can only be accessed from main thread