12 年之前 · e7e420a0a6
--- a/CamelotCore/Include/CmCoreThreadAccessor.h
+++ b/CamelotCore/Include/CmCoreThreadAccessor.h
@@ -186,15 +186,15 @@ namespace CamelotFramework
 
				 		}

			
 
				 

			
 
				 		/** @copydoc RenderSystem::draw() */

			
 
				-		void draw(UINT32 vertexCount)

			
 
				+		void draw(UINT32 vertexOffset, UINT32 vertexCount)

			
 
				 		{

			
 
				-			mCommandQueue->queue(boost::bind(&RenderSystem::draw, RenderSystem::instancePtr(), vertexCount));

			
 
				+			mCommandQueue->queue(boost::bind(&RenderSystem::draw, RenderSystem::instancePtr(), vertexOffset, vertexCount));

			
 
				 		}

			
 
				 

			
 
				 		/** @copydoc RenderSystem::drawIndexed() */

			
 
				-		void drawIndexed(UINT32 startIndex, UINT32 indexCount, UINT32 vertexCount)

			
 
				+		void drawIndexed(UINT32 startIndex, UINT32 indexCount, UINT32 vertexOffset, UINT32 vertexCount)

			
 
				 		{

			
 
				-			mCommandQueue->queue(boost::bind(&RenderSystem::drawIndexed, RenderSystem::instancePtr(), startIndex, indexCount, vertexCount));

			
 
				+			mCommandQueue->queue(boost::bind(&RenderSystem::drawIndexed, RenderSystem::instancePtr(), startIndex, indexCount, vertexOffset, vertexCount));

			
 
				 		}

			
 
				 

			
 
				 		/**

			
--- a/CamelotCore/Include/CmMeshData.h
+++ b/CamelotCore/Include/CmMeshData.h
@@ -173,6 +173,7 @@ namespace CamelotFramework
 
				 

			
 
				 	private:

			
 
				 		friend class Mesh; // To avoid polluting the public interface with a bunch of nearly useless methods for outside world

			
 
				+		friend class MeshHeap;

			
 
				 

			
 
				 		UINT32 mDescBuilding;

			
 
				 

			
--- a/CamelotCore/Include/CmMeshHeap.h
+++ b/CamelotCore/Include/CmMeshHeap.h
@@ -1,11 +1,138 @@
 
				 #pragma once

			
 
				 

			
 
				 #include "CmPrerequisites.h"

			
 
				+#include "CmCoreObject.h"

			
 
				+#include "CmIndexData.h"

			
 
				+#include "CmDrawOps.h"

			
 
				 

			
 
				 namespace CamelotFramework

			
 
				 {

			
 
				-	class CM_EXPORT MeshHeap

			
 
				+	/**

			
 
				+	 * @brief	Mesh heap allows you to quickly allocate and deallocate a 

			
 
				+	 * 			large amounts of temporary meshes without the large overhead of normal Mesh creation.

			
 
				+	 * 			Only requirement is that meshes share the same vertex description and index type.

			
 
				+	 * 			

			
 
				+	 * @note	This class should be considered as a replacement for a normal Mesh if you are constantly 

			
 
				+	 * 			updating the mesh (e.g. every frame) and you are not able to discard entire mesh contents 

			
 
				+	 * 			on each update. Not using discard flag on normal meshes may introduce GPU-CPU sync points

			
 
				+	 * 			which may severely limit performance. Primary purpose of this class is to avoid

			
 
				+	 * 			those sync points by not forcing you to discard contents.

			
 
				+	 * 			

			
 
				+	 *			Only downside is that this class may allocate 2-3x (or more) memory than it is actually needed

			
 
				+	 *			for your data.

			
 
				+	 *			

			
 
				+	 *			Sim thread only.

			
 
				+	 */

			
 
				+	class CM_EXPORT MeshHeap : public CoreObject

			
 
				 	{

			
 
				+		enum class UseFlags

			
 
				+		{

			
 
				+			Used,

			
 
				+			CPUFree,

			
 
				+			GPUFree,

			
 
				+			Free

			
 
				+		};

			
 
				 

			
 
				+		struct ChunkData

			
 
				+		{

			
 
				+			UINT32 start, size;

			
 
				+		};

			
 
				+

			
 
				+		struct AllocatedData

			
 
				+		{

			
 
				+			UINT32 vertChunkIdx;

			
 
				+			UINT32 idxChunkIdx;

			
 
				+

			
 
				+			UseFlags useFlags;

			
 
				+			UINT32 eventQueryIdx;

			
 
				+		};

			
 
				+

			
 
				+		struct QueryData

			
 
				+		{

			
 
				+			EventQueryPtr query;

			
 
				+			UINT32 queryId;

			
 
				+		};

			
 
				+

			
 
				+	public:

			
 
				+		~MeshHeap();

			
 
				+

			
 
				+		/**

			
 
				+		 * @note	Offsets provided by MeshData are ignored. MeshHeap will determine

			
 
				+		 * 			where the data will be written internally.

			
 
				+		 */

			
 
				+		TransientMeshPtr alloc(const MeshDataPtr& meshData, DrawOperationType drawOp = DOT_TRIANGLE_LIST);

			
 
				+		void dealloc(const TransientMeshPtr& mesh);

			
 
				+

			
 
				+		static MeshHeapPtr create(UINT32 numVertices, UINT32 numIndices, 

			
 
				+			const VertexDataDescPtr& vertexDesc, IndexBuffer::IndexType indexType = IndexBuffer::IT_32BIT);

			
 
				+

			
 
				+	private:

			
 
				+		UINT32 mNumVertices; // Core thread

			
 
				+		UINT32 mNumIndices; // Core thread

			
 
				+

			
 
				+		std::shared_ptr<VertexData> mVertexData; // Core thread

			
 
				+		std::shared_ptr<IndexData> mIndexData; // Core thread

			
 
				+

			
 
				+		Vector<UINT8*>::type mCPUVertexData; // Core thread

			
 
				+		UINT8* mCPUIndexData; // Core thread

			
 
				+

			
 
				+		VertexDataDescPtr mVertexDesc; // Immutable

			
 
				+		IndexBuffer::IndexType mIndexType; // Immutable

			
 
				+

			
 
				+		Map<UINT32, TransientMeshPtr>::type mMeshes; // Sim thread

			
 
				+		UINT32 mNextFreeId; // Sim thread

			
 
				+

			
 
				+		Map<UINT32, AllocatedData>::type mMeshAllocData; // Core thread

			
 
				+

			
 
				+		Vector<ChunkData>::type mVertChunks; // Core thread

			
 
				+		Vector<ChunkData>::type mIdxChunks; // Core thread

			
 
				+

			
 
				+		Stack<UINT32>::type mEmptyVertChunks; // Core thread

			
 
				+		Stack<UINT32>::type mEmptyIdxChunks; // Core thread

			
 
				+

			
 
				+		List<UINT32>::type mFreeVertChunks; // Core thread

			
 
				+		List<UINT32>::type mFreeIdxChunks; // Core thread

			
 
				+

			
 
				+		Vector<QueryData>::type mEventQueries; // Core thread

			
 
				+		Stack<UINT32>::type mFreeEventQueries; // Core thread

			
 
				+

			
 
				+		UINT32 mNextQueryId;

			
 
				+

			
 
				+		static const float GrowPercent;

			
 
				+	private:

			
 
				+		friend class TransientMesh;

			
 
				+

			
 
				+		MeshHeap(UINT32 numVertices, UINT32 numIndices, 

			
 
				+			const VertexDataDescPtr& vertexDesc, IndexBuffer::IndexType indexType = IndexBuffer::IT_32BIT);

			
 
				+

			
 
				+		/**

			
 
				+		 * @copydoc Resource::initialize_internal()

			
 
				+		 */

			
 
				+		virtual void initialize_internal();

			
 
				+

			
 
				+		/**

			
 
				+		 * @copydoc Resource::destroy_internal()

			
 
				+		 */

			
 
				+		virtual void destroy_internal();

			
 
				+

			
 
				+		void allocInternal(UINT32 meshId, const MeshDataPtr& meshData);

			
 
				+		void deallocInternal(UINT32 meshId);

			
 
				+

			
 
				+		void growVertexBuffer(UINT32 numVertices);

			
 
				+		void growIndexBuffer(UINT32 numIndices);

			
 
				+

			
 
				+		UINT32 createEventQuery();

			
 
				+		void freeEventQuery(UINT32 idx);

			
 
				+

			
 
				+		std::shared_ptr<VertexData> getVertexData() const;

			
 
				+		std::shared_ptr<IndexData> getIndexData() const;

			
 
				+

			
 
				+		UINT32 getVertexOffset(UINT32 meshId) const;

			
 
				+		UINT32 getIndexOffset(UINT32 meshId) const;

			
 
				+

			
 
				+		void notifyUsedOnGPU(UINT32 meshId);

			
 
				+		void queryTriggered(UINT32 meshId, UINT32 queryId);

			
 
				+

			
 
				+		void mergeWithNearbyChunks(UINT32 chunkVertIdx, UINT32 chunkIdxIdx);

			
 
				 	};

			
 
				 }
			
--- a/CamelotCore/Include/CmRenderSystem.h
+++ b/CamelotCore/Include/CmRenderSystem.h
@@ -200,14 +200,14 @@ namespace CamelotFramework
 
				 		 *			Draws directly from the vertex buffer without using

			
 
				 		 *			indices.

			
 
				 		 */

			
 
				-		virtual void draw(UINT32 vertexCount) = 0;

			
 
				+		virtual void draw(UINT32 vertexOffset, UINT32 vertexCount) = 0;

			
 
				 

			
 
				 		/**

			
 
				 		 * @brief	Draw an object based on currently set

			
 
				 		 * 			shaders, vertex declaration and vertex 

			
 
				 		 * 			and index buffers.

			
 
				 		 */

			
 
				-		virtual void drawIndexed(UINT32 startIndex, UINT32 indexCount, UINT32 vertexCount) = 0;

			
 
				+		virtual void drawIndexed(UINT32 startIndex, UINT32 indexCount, UINT32 vertexOffset, UINT32 vertexCount) = 0;

			
 
				 

			
 
				 		/**

			
 
				 		 * @brief	Swap the front and back buffer of the specified render target.

			
--- a/CamelotCore/Include/CmVertexDataDesc.h
+++ b/CamelotCore/Include/CmVertexDataDesc.h
@@ -43,6 +43,7 @@ namespace CamelotFramework
 
				 		const VertexElement& getElement(UINT32 idx) const { return mVertexElements[idx]; }

			
 
				 	private:

			
 
				 		friend class Mesh; // To avoid polluting the public interface with a bunch of nearly useless methods for outside world

			
 
				+		friend class MeshHeap;

			
 
				 

			
 
				 		Vector<VertexElement>::type mVertexElements;

			
 
				 

			
--- a/CamelotCore/Source/CmMesh.cpp
+++ b/CamelotCore/Source/CmMesh.cpp
@@ -116,7 +116,7 @@ namespace CamelotFramework
 
				 					if(!meshData.getVertexDesc()->hasElement(VES_COLOR, semanticIdx, i))

			
 
				 						continue;

			
 
				 

			
 
				-					UINT8* colorData = bufferCopy + meshData.getElementOffset(VES_COLOR, semanticIdx, i);

			
 
				+					UINT8* colorData = bufferCopy + mVertexDesc->getElementOffsetFromStream(VES_COLOR, semanticIdx, i);

			
 
				 					for(UINT32 j = 0; j < mVertexData->vertexCount; j++)

			
 
				 					{

			
 
				 						UINT32* curColor = (UINT32*)colorData;

			
--- a/CamelotCore/Source/CmMeshHeap.cpp
+++ b/CamelotCore/Source/CmMeshHeap.cpp
@@ -1,6 +1,633 @@
 
				 #include "CmMeshHeap.h"

			
 
				+#include "CmCoreThread.h"

			
 
				+#include "CmTransientMesh.h"

			
 
				+#include "CmHardwareBufferManager.h"

			
 
				+#include "CmVertexDataDesc.h"

			
 
				+#include "CmVertexData.h"

			
 
				+#include "CmIndexData.h"

			
 
				+#include "CmMeshData.h"

			
 
				+#include "CmMath.h"

			
 
				+#include "CmEventQuery.h"

			
 
				 

			
 
				 namespace CamelotFramework

			
 
				 {

			
 
				+	const float MeshHeap::GrowPercent = 1.5f;

			
 
				 

			
 
				+	MeshHeap::MeshHeap(UINT32 numVertices, UINT32 numIndices, 

			
 
				+		const VertexDataDescPtr& vertexDesc, IndexBuffer::IndexType indexType)

			
 
				+		:mNumVertices(numVertices), mNumIndices(numIndices), mNextFreeId(0), 

			
 
				+		mIndexType(indexType), mVertexDesc(vertexDesc), mCPUIndexData(nullptr),

			
 
				+		mNextQueryId(0)

			
 
				+	{

			
 
				+		for(UINT32 i = 0; i <= mVertexDesc->getMaxStreamIdx(); i++)

			
 
				+		{

			
 
				+			mCPUVertexData.push_back(nullptr);

			
 
				+		}

			
 
				+	}

			
 
				+

			
 
				+	MeshHeap::~MeshHeap()

			
 
				+	{

			
 
				+

			
 
				+	}

			
 
				+

			
 
				+	MeshHeapPtr MeshHeap::create(UINT32 numVertices, UINT32 numIndices, 

			
 
				+		const VertexDataDescPtr& vertexDesc, IndexBuffer::IndexType indexType)

			
 
				+	{

			
 
				+		MeshHeap* meshHeap = new (cm_alloc<MeshHeap>()) MeshHeap(numVertices, numIndices, vertexDesc, indexType); 

			
 
				+		MeshHeapPtr meshHeapPtr = cm_core_ptr<MeshHeap, GenAlloc>(meshHeap);

			
 
				+

			
 
				+		meshHeapPtr->setThisPtr(meshHeapPtr);

			
 
				+		meshHeapPtr->initialize();

			
 
				+

			
 
				+		return meshHeapPtr;

			
 
				+	}

			
 
				+

			
 
				+	void MeshHeap::initialize_internal()

			
 
				+	{

			
 
				+		THROW_IF_NOT_CORE_THREAD;

			
 
				+

			
 
				+		growVertexBuffer(mNumVertices);

			
 
				+		growIndexBuffer(mNumIndices);

			
 
				+	}

			
 
				+

			
 
				+	void MeshHeap::destroy_internal()

			
 
				+	{

			
 
				+		THROW_IF_NOT_CORE_THREAD;

			
 
				+

			
 
				+		CoreObject::destroy_internal();

			
 
				+	}

			
 
				+

			
 
				+	TransientMeshPtr MeshHeap::alloc(const MeshDataPtr& meshData, DrawOperationType drawOp)

			
 
				+	{

			
 
				+		UINT32 meshIdx = mNextFreeId++;

			
 
				+

			
 
				+		MeshHeapPtr thisPtr = std::static_pointer_cast<MeshHeap>(getThisPtr());

			
 
				+		TransientMesh* transientMesh = new (cm_alloc<TransientMesh>()) TransientMesh(thisPtr, meshIdx, meshData->getNumVertices(), meshData->getNumIndices(), drawOp); 

			
 
				+		TransientMeshPtr transientMeshPtr = cm_core_ptr<TransientMesh, GenAlloc>(transientMesh);

			
 
				+

			
 
				+		transientMeshPtr->setThisPtr(transientMeshPtr);

			
 
				+		transientMeshPtr->initialize();

			
 
				+

			
 
				+		mMeshes[meshIdx] = transientMeshPtr;

			
 
				+

			
 
				+		queueGpuCommand(getThisPtr(), boost::bind(&MeshHeap::allocInternal, this, meshIdx, meshData));

			
 
				+

			
 
				+		return transientMeshPtr;

			
 
				+	}

			
 
				+

			
 
				+	void MeshHeap::dealloc(const TransientMeshPtr& mesh)

			
 
				+	{

			
 
				+		auto iterFind = mMeshes.find(mesh->mId);

			
 
				+		if(iterFind == mMeshes.end())

			
 
				+			return;

			
 
				+

			
 
				+		mMeshes.erase(iterFind);

			
 
				+

			
 
				+		queueGpuCommand(getThisPtr(), boost::bind(&MeshHeap::deallocInternal, this, mesh->mId));

			
 
				+	}

			
 
				+

			
 
				+	void MeshHeap::allocInternal(UINT32 meshId, const MeshDataPtr& meshData)

			
 
				+	{

			
 
				+		// Find free vertex chunk and grow if needed

			
 
				+		UINT32 smallestVertFit = 0;

			
 
				+		UINT32 smallestVertFitIdx = 0;

			
 
				+

			
 
				+		while(smallestVertFit == 0)

			
 
				+		{

			
 
				+			UINT32 curIdx = 0;

			
 
				+			for(auto& chunkIdx : mFreeVertChunks)

			
 
				+			{

			
 
				+				ChunkData& chunk = mVertChunks[chunkIdx];

			
 
				+

			
 
				+				if(chunk.size >= meshData->getNumVertices() && (chunk.size < smallestVertFit || smallestVertFit == 0))

			
 
				+				{

			
 
				+					smallestVertFit = chunk.size;

			
 
				+					smallestVertFitIdx = curIdx;

			
 
				+				}

			
 
				+

			
 
				+				curIdx++;

			
 
				+			}

			
 
				+

			
 
				+			if(smallestVertFit > 0)

			
 
				+				break;

			
 
				+

			
 
				+			UINT32 newNumVertices = mNumVertices;

			
 
				+			while(newNumVertices < (mNumVertices + meshData->getNumVertices()))

			
 
				+			{

			
 
				+				newNumVertices = Math::RoundToInt(newNumVertices * GrowPercent);

			
 
				+			}

			
 
				+

			
 
				+			growVertexBuffer(newNumVertices);

			
 
				+		}

			
 
				+

			
 
				+		// Find free index chunk and grow if needed

			
 
				+		UINT32 smallestIdxFit = 0;

			
 
				+		UINT32 smallestIdxFitIdx = 0;

			
 
				+

			
 
				+		while(smallestIdxFit == 0)

			
 
				+		{

			
 
				+			UINT32 curIdx = 0;

			
 
				+			for(auto& chunkIdx : mFreeIdxChunks)

			
 
				+			{

			
 
				+				ChunkData& chunk = mIdxChunks[chunkIdx];

			
 
				+

			
 
				+				if(chunk.size >= meshData->getNumIndices() && (chunk.size < smallestIdxFit || smallestIdxFit == 0))

			
 
				+				{

			
 
				+					smallestIdxFit = chunk.size;

			
 
				+					smallestIdxFitIdx = curIdx;

			
 
				+				}

			
 
				+

			
 
				+				curIdx++;

			
 
				+			}

			
 
				+

			
 
				+			if(smallestIdxFit > 0)

			
 
				+				break;

			
 
				+

			
 
				+			UINT32 newNumIndices = mNumIndices;

			
 
				+			while(newNumIndices < (mNumIndices + meshData->getNumIndices()))

			
 
				+			{

			
 
				+				newNumIndices = Math::RoundToInt(newNumIndices * GrowPercent);

			
 
				+			}

			
 
				+

			
 
				+			growIndexBuffer(newNumIndices);

			
 
				+		}

			
 
				+

			
 
				+		UINT32 freeVertChunkIdx = 0;

			
 
				+		UINT32 freeIdxChunkIdx = 0;

			
 
				+

			
 
				+		auto freeVertIter = mFreeVertChunks.begin();

			
 
				+		freeVertChunkIdx = (*freeVertIter);

			
 
				+		for(UINT32 i = 0; i < smallestVertFitIdx; i++)

			
 
				+		{

			
 
				+			freeVertIter++;

			
 
				+			freeVertChunkIdx = (*freeVertIter);

			
 
				+		}

			
 
				+

			
 
				+		mFreeVertChunks.erase(freeVertIter);

			
 
				+		

			
 
				+		auto freeIdxIter = mFreeIdxChunks.begin();

			
 
				+		freeIdxChunkIdx = (*freeIdxIter);

			
 
				+		for(UINT32 i = 0; i < smallestIdxFitIdx; i++)

			
 
				+		{

			
 
				+			freeIdxIter++;

			
 
				+			freeIdxChunkIdx = (*freeIdxIter);

			
 
				+		}

			
 
				+

			
 
				+		mFreeIdxChunks.erase(freeIdxIter);

			
 
				+

			
 
				+		ChunkData& vertChunk = mVertChunks[freeVertChunkIdx];

			
 
				+		ChunkData& idxChunk = mIdxChunks[freeIdxChunkIdx];

			
 
				+

			
 
				+		UINT32 remainingNumVerts = vertChunk.size - meshData->getNumVertices();

			
 
				+		UINT32 remainingNumIdx = idxChunk.size - meshData->getNumIndices();

			
 
				+

			
 
				+		if(remainingNumVerts > 0)

			
 
				+		{

			
 
				+			if(!mEmptyVertChunks.empty())

			
 
				+			{

			
 
				+				UINT32 emptyChunkIdx = mEmptyVertChunks.top();

			
 
				+				ChunkData& emptyChunk = mVertChunks[emptyChunkIdx];

			
 
				+				mEmptyVertChunks.pop();

			
 
				+

			
 
				+				emptyChunk.start = vertChunk.start + meshData->getNumVertices();

			
 
				+				emptyChunk.size = remainingNumVerts;

			
 
				+			}

			
 
				+			else

			
 
				+			{

			
 
				+				ChunkData newChunk;

			
 
				+				newChunk.size = remainingNumVerts;

			
 
				+				newChunk.start = vertChunk.start + meshData->getNumVertices();

			
 
				+

			
 
				+				mVertChunks.push_back(newChunk);

			
 
				+				mFreeVertChunks.push_back((UINT32)(mVertChunks.size() - 1));

			
 
				+			}

			
 
				+		}

			
 
				+

			
 
				+		if(remainingNumIdx > 0)

			
 
				+		{

			
 
				+			if(!mEmptyIdxChunks.empty())

			
 
				+			{

			
 
				+				UINT32 emptyChunkIdx = mEmptyIdxChunks.top();

			
 
				+				ChunkData& emptyChunk = mIdxChunks[emptyChunkIdx];

			
 
				+				mEmptyIdxChunks.pop();

			
 
				+

			
 
				+				emptyChunk.start = idxChunk.start + meshData->getNumIndices();

			
 
				+				emptyChunk.size = remainingNumIdx;

			
 
				+			}

			
 
				+			else

			
 
				+			{

			
 
				+				ChunkData newChunk;

			
 
				+				newChunk.size = remainingNumIdx;

			
 
				+				newChunk.start = idxChunk.start + meshData->getNumIndices();

			
 
				+

			
 
				+				mIdxChunks.push_back(newChunk);

			
 
				+				mFreeIdxChunks.push_back((UINT32)(mIdxChunks.size() - 1));

			
 
				+			}

			
 
				+		}

			
 
				+

			
 
				+		vertChunk.size = meshData->getNumVertices();

			
 
				+		idxChunk.size = meshData->getNumIndices();

			
 
				+

			
 
				+		AllocatedData newAllocData;

			
 
				+		newAllocData.vertChunkIdx = freeVertChunkIdx;

			
 
				+		newAllocData.idxChunkIdx = freeIdxChunkIdx;

			
 
				+		newAllocData.useFlags = UseFlags::GPUFree;

			
 
				+		newAllocData.eventQueryIdx = createEventQuery();

			
 
				+

			
 
				+		mMeshAllocData[meshId] = newAllocData;

			
 
				+

			
 
				+		// Actually copy data

			
 
				+		for(UINT32 i = 0; i <= mVertexDesc->getMaxStreamIdx(); i++)

			
 
				+		{

			
 
				+			if(!mVertexDesc->hasStream(i))

			
 
				+				continue;

			
 
				+

			
 
				+			UINT32 vertSize = mVertexData->vertexDeclaration->getVertexSize(i);

			
 
				+			VertexBufferPtr vertexBuffer = mVertexData->getBuffer(i);

			
 
				+

			
 
				+			UINT8* vertDest = mCPUVertexData[i] + vertChunk.start * vertSize;

			
 
				+			memcpy(vertDest, meshData->getStreamData(i), vertChunk.start * vertSize);

			
 
				+

			
 
				+			if(vertexBuffer->vertexColorReqRGBFlip())

			
 
				+			{

			
 
				+				UINT32 vertexStride = mVertexDesc->getVertexStride(i);

			
 
				+				for(INT32 semanticIdx = 0; semanticIdx < VertexBuffer::MAX_SEMANTIC_IDX; semanticIdx++)

			
 
				+				{

			
 
				+					if(!mVertexDesc->hasElement(VES_COLOR, semanticIdx, i))

			
 
				+						continue;

			
 
				+

			
 
				+					UINT8* colorData = vertDest + mVertexDesc->getElementOffsetFromStream(VES_COLOR, semanticIdx, i);

			
 
				+					for(UINT32 j = 0; j < mVertexData->vertexCount; j++)

			
 
				+					{

			
 
				+						UINT32* curColor = (UINT32*)colorData;

			
 
				+

			
 
				+						(*curColor) = ((*curColor) & 0xFF00FF00) | ((*curColor >> 16) & 0x000000FF) | ((*curColor << 16) & 0x00FF0000);

			
 
				+

			
 
				+						colorData += vertexStride;

			
 
				+					}

			
 
				+				}

			
 
				+			}

			
 
				+

			
 
				+			vertexBuffer->writeData(vertChunk.start * vertSize, vertChunk.size * vertSize, vertDest, false);

			
 
				+		}

			
 
				+

			
 
				+		IndexBufferPtr indexBuffer = mIndexData->indexBuffer;

			
 
				+		UINT32 idxSize = indexBuffer->getIndexSize();

			
 
				+

			
 
				+		UINT8* idxDest = mCPUIndexData + idxChunk.start * idxSize;

			
 
				+		memcpy(idxDest, meshData->getIndexData(), idxChunk.start * idxSize);

			
 
				+		indexBuffer->writeData(idxChunk.start * idxSize, idxChunk.size * idxSize, idxDest, false);

			
 
				+	}

			
 
				+

			
 
				+	void MeshHeap::deallocInternal(UINT32 meshId)

			
 
				+	{

			
 
				+		auto findIter = mMeshAllocData.find(meshId);

			
 
				+		assert(findIter != mMeshAllocData.end());

			
 
				+

			
 
				+		AllocatedData& allocData = findIter->second;

			
 
				+		if(allocData.useFlags == UseFlags::GPUFree)

			
 
				+		{

			
 
				+			allocData.useFlags = UseFlags::Free;

			
 
				+			freeEventQuery(allocData.eventQueryIdx);

			
 
				+

			
 
				+			mFreeVertChunks.push_back(allocData.vertChunkIdx);

			
 
				+			mFreeIdxChunks.push_back(allocData.idxChunkIdx);

			
 
				+

			
 
				+			mergeWithNearbyChunks(allocData.vertChunkIdx, allocData.idxChunkIdx);

			
 
				+

			
 
				+			mMeshAllocData.erase(findIter);

			
 
				+		}

			
 
				+		else if(allocData.useFlags == UseFlags::Used)

			
 
				+			allocData.useFlags = UseFlags::CPUFree;

			
 
				+	}

			
 
				+

			
 
				+	void MeshHeap::growVertexBuffer(UINT32 numVertices)

			
 
				+	{

			
 
				+		mNumVertices = numVertices;

			
 
				+		mVertexData = std::shared_ptr<VertexData>(cm_new<VertexData, PoolAlloc>());

			
 
				+

			
 
				+		mVertexData->vertexCount = mNumVertices;

			
 
				+		mVertexData->vertexDeclaration = mVertexDesc->createDeclaration();

			
 
				+

			
 
				+		// Create buffers and copy data

			
 
				+		for(UINT32 i = 0; i <= mVertexDesc->getMaxStreamIdx(); i++)

			
 
				+		{

			
 
				+			if(!mVertexDesc->hasStream(i))

			
 
				+				continue;

			
 
				+

			
 
				+			UINT32 vertSize = mVertexData->vertexDeclaration->getVertexSize(i);

			
 
				+			VertexBufferPtr vertexBuffer = HardwareBufferManager::instance().createVertexBuffer(

			
 
				+				vertSize, mVertexData->vertexCount, GBU_DYNAMIC);

			
 
				+

			
 
				+			mVertexData->setBuffer(i, vertexBuffer);

			
 
				+

			
 
				+			// Copy all data to the new buffer

			
 
				+			UINT8* oldBuffer = mCPUVertexData[i];

			
 
				+			UINT8* buffer = (UINT8*)cm_alloc(vertSize * numVertices);

			
 
				+

			
 
				+			UINT32 destOffset = 0;

			
 
				+			if(oldBuffer != nullptr)

			
 
				+			{

			
 
				+				for(auto& allocData : mMeshAllocData)

			
 
				+				{

			
 
				+					ChunkData& oldChunk = mVertChunks[allocData.second.vertChunkIdx];

			
 
				+

			
 
				+					UINT8* oldData = oldBuffer + oldChunk.start * vertSize;

			
 
				+					memcpy(buffer + destOffset * vertSize, oldData, oldChunk.size * vertSize);

			
 
				+

			
 
				+					destOffset += oldChunk.size;

			
 
				+				}

			
 
				+

			
 
				+				cm_free(oldBuffer);

			
 
				+			}

			
 
				+

			
 
				+			vertexBuffer->writeData(0, destOffset * vertSize, buffer, false);

			
 
				+

			
 
				+			mCPUVertexData[i] = buffer;

			
 
				+		}

			
 
				+

			
 
				+		// Reorder chunks

			
 
				+		UINT32 destOffset = 0;

			
 
				+		Vector<ChunkData>::type newVertChunks;

			
 
				+		List<UINT32>::type freeVertChunks;

			
 
				+

			
 
				+		for(auto& allocData : mMeshAllocData)

			
 
				+		{

			
 
				+			ChunkData& oldChunk = mVertChunks[allocData.second.vertChunkIdx];

			
 
				+

			
 
				+			ChunkData newChunk;

			
 
				+			newChunk.start = destOffset;

			
 
				+			newChunk.size = oldChunk.size;

			
 
				+

			
 
				+			allocData.second.vertChunkIdx = (UINT32)newVertChunks.size();

			
 
				+			newVertChunks.push_back(newChunk);

			
 
				+

			
 
				+			destOffset += oldChunk.size;

			
 
				+		}

			
 
				+

			
 
				+		// Add free chunk

			
 
				+		if(destOffset != mNumVertices)

			
 
				+		{

			
 
				+			ChunkData newChunk;

			
 
				+			newChunk.start = destOffset;

			
 
				+			newChunk.size = mNumVertices - destOffset;

			
 
				+

			
 
				+			newVertChunks.push_back(newChunk);

			
 
				+			freeVertChunks.push_back((UINT32)(newVertChunks.size() - 1));

			
 
				+		}

			
 
				+

			
 
				+		mVertChunks = newVertChunks;

			
 
				+		mFreeVertChunks = freeVertChunks;

			
 
				+		

			
 
				+		while(!mEmptyVertChunks.empty())

			
 
				+			mEmptyVertChunks.pop();

			
 
				+	}

			
 
				+

			
 
				+	void MeshHeap::growIndexBuffer(UINT32 numIndices)

			
 
				+	{

			
 
				+		mNumIndices = numIndices;

			
 
				+

			
 
				+		mIndexData = std::shared_ptr<IndexData>(cm_new<IndexData, PoolAlloc>());

			
 
				+		mIndexData->indexCount = mNumIndices;

			
 
				+		mIndexData->indexBuffer = HardwareBufferManager::instance().createIndexBuffer(

			
 
				+			mIndexType, mIndexData->indexCount, GBU_DYNAMIC);

			
 
				+

			
 
				+		// Copy all data to the new buffer

			
 
				+		UINT32 idxSize = mIndexData->indexBuffer->getIndexSize();

			
 
				+

			
 
				+		UINT8* oldBuffer = mCPUIndexData;

			
 
				+		UINT8* buffer = (UINT8*)cm_alloc(idxSize * numIndices);

			
 
				+

			
 
				+		UINT32 destOffset = 0;

			
 
				+		if(oldBuffer != nullptr)

			
 
				+		{

			
 
				+			for(auto& allocData : mMeshAllocData)

			
 
				+			{

			
 
				+				ChunkData& oldChunk = mIdxChunks[allocData.second.idxChunkIdx];

			
 
				+

			
 
				+				UINT8* oldData = oldBuffer + oldChunk.start * idxSize;

			
 
				+				memcpy(buffer + destOffset * idxSize, oldData, oldChunk.size * idxSize);

			
 
				+

			
 
				+				destOffset += oldChunk.size;

			
 
				+			}

			
 
				+

			
 
				+			cm_free(oldBuffer);

			
 
				+		}

			
 
				+

			
 
				+		mIndexData->indexBuffer->writeData(0, destOffset * idxSize, buffer, false);

			
 
				+

			
 
				+		mCPUIndexData = buffer;

			
 
				+

			
 
				+		// Reorder chunks

			
 
				+		destOffset = 0;

			
 
				+		Vector<ChunkData>::type newIdxChunks;

			
 
				+		List<UINT32>::type freeIdxChunks;

			
 
				+

			
 
				+		for(auto& allocData : mMeshAllocData)

			
 
				+		{

			
 
				+			ChunkData& oldChunk = mIdxChunks[allocData.second.idxChunkIdx];

			
 
				+

			
 
				+			ChunkData newChunk;

			
 
				+			newChunk.start = destOffset;

			
 
				+			newChunk.size = oldChunk.size;

			
 
				+

			
 
				+			allocData.second.idxChunkIdx = (UINT32)newIdxChunks.size();

			
 
				+			newIdxChunks.push_back(newChunk);

			
 
				+

			
 
				+			destOffset += oldChunk.size;

			
 
				+		}

			
 
				+

			
 
				+		// Add free chunk

			
 
				+		if(destOffset != mNumIndices)

			
 
				+		{

			
 
				+			ChunkData newChunk;

			
 
				+			newChunk.start = destOffset;

			
 
				+			newChunk.size = mNumIndices - destOffset;

			
 
				+

			
 
				+			newIdxChunks.push_back(newChunk);

			
 
				+			freeIdxChunks.push_back((UINT32)(newIdxChunks.size() - 1));

			
 
				+		}

			
 
				+

			
 
				+		mIdxChunks = newIdxChunks;

			
 
				+		mFreeIdxChunks = freeIdxChunks;

			
 
				+

			
 
				+		while(!mEmptyIdxChunks.empty())

			
 
				+			mEmptyIdxChunks.pop();

			
 
				+	}

			
 
				+

			
 
				+	UINT32 MeshHeap::createEventQuery()

			
 
				+	{

			
 
				+		UINT32 idx = 0;

			
 
				+		if(mFreeEventQueries.size() > 0)

			
 
				+		{

			
 
				+			idx = mFreeEventQueries.top();

			
 
				+			mFreeEventQueries.pop();

			
 
				+		}

			
 
				+		else

			
 
				+		{

			
 
				+			QueryData newQuery;

			
 
				+			newQuery.query = EventQuery::create();

			
 
				+			newQuery.queryId = 0;

			
 
				+

			
 
				+			mEventQueries.push_back(newQuery);

			
 
				+			idx = (UINT32)(mEventQueries.size() - 1);

			
 
				+		}

			
 
				+

			
 
				+		return idx;

			
 
				+	}

			
 
				+

			
 
				+	void MeshHeap::freeEventQuery(UINT32 idx)

			
 
				+	{

			
 
				+		mEventQueries[idx].queryId = 0;

			
 
				+		mFreeEventQueries.push(idx);

			
 
				+	}

			
 
				+

			
 
				+	std::shared_ptr<VertexData> MeshHeap::getVertexData() const

			
 
				+	{

			
 
				+		return mVertexData;

			
 
				+	}

			
 
				+

			
 
				+	std::shared_ptr<IndexData> MeshHeap::getIndexData() const

			
 
				+	{

			
 
				+		return mIndexData;

			
 
				+	}

			
 
				+

			
 
				+	UINT32 MeshHeap::getVertexOffset(UINT32 meshId) const

			
 
				+	{

			
 
				+		auto findIter = mMeshAllocData.find(meshId);

			
 
				+		assert(findIter != mMeshAllocData.end());

			
 
				+

			
 
				+		UINT32 chunkIdx = findIter->second.vertChunkIdx;

			
 
				+		return mVertChunks[chunkIdx].start;

			
 
				+	}

			
 
				+

			
 
				+	UINT32 MeshHeap::getIndexOffset(UINT32 meshId) const

			
 
				+	{

			
 
				+		auto findIter = mMeshAllocData.find(meshId);

			
 
				+		assert(findIter != mMeshAllocData.end());

			
 
				+

			
 
				+		UINT32 chunkIdx = findIter->second.idxChunkIdx;

			
 
				+		return mIdxChunks[chunkIdx].start;

			
 
				+	}

			
 
				+

			
 
				+	void MeshHeap::notifyUsedOnGPU(UINT32 meshId)

			
 
				+	{

			
 
				+		auto findIter = mMeshAllocData.find(meshId);

			
 
				+		assert(findIter != mMeshAllocData.end());

			
 
				+

			
 
				+		AllocatedData& allocData = findIter->second;

			
 
				+		assert(allocData.useFlags != UseFlags::Free);

			
 
				+

			
 
				+		if(allocData.useFlags == UseFlags::GPUFree)

			
 
				+			allocData.useFlags = UseFlags::Used;

			
 
				+

			
 
				+		QueryData& queryData = mEventQueries[allocData.eventQueryIdx];

			
 
				+		queryData.queryId = mNextQueryId++;

			
 
				+		queryData.query->onTriggered.connect(boost::bind(&MeshHeap::queryTriggered, this, meshId, queryData.queryId));

			
 
				+		queryData.query->begin();

			
 
				+	}

			
 
				+

			
 
				+	void MeshHeap::queryTriggered(UINT32 meshId, UINT32 queryId)

			
 
				+	{

			
 
				+		auto findIter = mMeshAllocData.find(meshId);

			
 
				+		assert(findIter != mMeshAllocData.end());

			
 
				+

			
 
				+		AllocatedData& allocData = findIter->second;

			
 
				+

			
 
				+		// If query ids don't match then it means there either a more recent query or

			
 
				+		// the buffer was discarded and we are not interested in query result

			
 
				+		QueryData& queryData = mEventQueries[allocData.eventQueryIdx];

			
 
				+		if(queryId == queryData.queryId) 

			
 
				+		{

			
 
				+			assert(allocData.useFlags != UseFlags::Free && allocData.useFlags != UseFlags::GPUFree);

			
 
				+

			
 
				+			if(allocData.useFlags == UseFlags::CPUFree)

			
 
				+			{

			
 
				+				allocData.useFlags = UseFlags::Free;

			
 
				+				freeEventQuery(allocData.eventQueryIdx);

			
 
				+

			
 
				+				mFreeVertChunks.push_back(allocData.vertChunkIdx);

			
 
				+				mFreeIdxChunks.push_back(allocData.idxChunkIdx);

			
 
				+

			
 
				+				mergeWithNearbyChunks(allocData.vertChunkIdx, allocData.idxChunkIdx);

			
 
				+

			
 
				+				mMeshAllocData.erase(findIter);

			
 
				+			}

			
 
				+			else

			
 
				+				allocData.useFlags = UseFlags::GPUFree;

			
 
				+		}

			
 
				+	}

			
 
				+

			
 
				+	void MeshHeap::mergeWithNearbyChunks(UINT32 chunkVertIdx, UINT32 chunkIdxIdx)

			
 
				+	{

			
 
				+		// Merge vertex chunks

			
 
				+		ChunkData& vertChunk = mVertChunks[chunkVertIdx];

			
 
				+		for(auto& freeChunkIdx : mFreeVertChunks)

			
 
				+		{

			
 
				+			if(chunkVertIdx == freeChunkIdx)

			
 
				+				continue;

			
 
				+

			
 
				+			ChunkData& curChunk = mVertChunks[freeChunkIdx];

			
 
				+			bool merged = false;

			
 
				+

			
 
				+			if(curChunk.start == (vertChunk.start + vertChunk.size))

			
 
				+			{

			
 
				+				vertChunk.size += curChunk.size;

			
 
				+

			
 
				+				merged = true;

			
 
				+			}

			
 
				+			

			
 
				+			if((curChunk.start + curChunk.size) == vertChunk.start)

			
 
				+			{

			
 
				+				vertChunk.start = curChunk.start;

			
 
				+				vertChunk.size += curChunk.size;

			
 
				+

			
 
				+				merged = true;

			
 
				+			}

			
 
				+

			
 
				+			if(merged)

			
 
				+			{

			
 
				+				// We can't remove the chunk since that would break the indexing scheme, so 

			
 
				+				// mark it as empty and set size to 0. It will be reused when needed.

			
 
				+				curChunk.start = 0;

			
 
				+				curChunk.size = 0;

			
 
				+				mEmptyVertChunks.push(freeChunkIdx);

			
 
				+			}

			
 
				+		}

			
 
				+

			
 
				+		// Merge index chunks

			
 
				+		ChunkData& idxChunk = mIdxChunks[chunkIdxIdx];

			
 
				+		for(auto& freeChunkIdx : mFreeIdxChunks)

			
 
				+		{

			
 
				+			if(chunkIdxIdx == freeChunkIdx)

			
 
				+				continue;

			
 
				+

			
 
				+			ChunkData& curChunk = mIdxChunks[freeChunkIdx];

			
 
				+			bool merged = false;

			
 
				+

			
 
				+			if(curChunk.start == (idxChunk.start + idxChunk.size))

			
 
				+			{

			
 
				+				idxChunk.size += curChunk.size;

			
 
				+

			
 
				+				merged = true;

			
 
				+			}

			
 
				+

			
 
				+			if((curChunk.start + curChunk.size) == idxChunk.start)

			
 
				+			{

			
 
				+				idxChunk.start = curChunk.start;

			
 
				+				idxChunk.size += curChunk.size;

			
 
				+

			
 
				+				merged = true;

			
 
				+			}

			
 
				+

			
 
				+			if(merged)

			
 
				+			{

			
 
				+				// We can't remove the chunk since that would break the indexing scheme, so 

			
 
				+				// mark it as empty and set size to 0. It will be reused when needed.

			
 
				+				curChunk.start = 0;

			
 
				+				curChunk.size = 0;

			
 
				+				mEmptyIdxChunks.push(freeChunkIdx);

			
 
				+			}

			
 
				+		}

			
 
				+	}

			
 
				 }
			
--- a/CamelotCore/Source/CmRenderSystem.cpp
+++ b/CamelotCore/Source/CmRenderSystem.cpp
@@ -260,10 +260,12 @@ namespace CamelotFramework {
 
				 				indexCount = indexData->indexCount;

			
 
				 

			
 
				 			setIndexBuffer(indexData->indexBuffer);

			
 
				-			drawIndexed(indexOffset, indexCount, vertexData->vertexCount);

			
 
				+			drawIndexed(indexOffset + mesh->getIndexOffset(), indexCount, mesh->getVertexOffset(), vertexData->vertexCount);

			
 
				 		}

			
 
				 		else

			
 
				-			draw(vertexData->vertexCount);

			
 
				+			draw(mesh->getVertexOffset(), vertexData->vertexCount);

			
 
				+

			
 
				+		mesh->notifyUsedOnGPU();

			
 
				 

			
 
				 		gProfiler().endSample("render");

			
 
				 	}

			
--- a/CamelotCore/Source/CmTransientMesh.cpp
+++ b/CamelotCore/Source/CmTransientMesh.cpp
@@ -1,6 +1,7 @@
 
				 #include "CmTransientMesh.h"

			
 
				 #include "CmVertexData.h"

			
 
				 #include "CmIndexData.h"

			
 
				+#include "CmMeshHeap.h"

			
 
				 

			
 
				 namespace CamelotFramework

			
 
				 {

			
@@ -27,30 +28,26 @@ namespace CamelotFramework
 
				 

			
 
				 	std::shared_ptr<VertexData> TransientMesh::getVertexData() const

			
 
				 	{

			
 
				-		// TODO - Get vertex data from parent MeshHeap

			
 
				-		return nullptr;

			
 
				+		return mParentHeap->getVertexData();

			
 
				 	}

			
 
				 

			
 
				 	std::shared_ptr<IndexData> TransientMesh::getIndexData() const

			
 
				 	{

			
 
				-		// TODO - Get index data from parent MeshHeap

			
 
				-		return nullptr;

			
 
				+		return mParentHeap->getIndexData();

			
 
				 	}

			
 
				 

			
 
				 	UINT32 TransientMesh::getVertexOffset() const

			
 
				 	{

			
 
				-		// TODO - Get vertex offset from parent MeshHeap

			
 
				-		return 0;

			
 
				+		return mParentHeap->getVertexOffset(mId);

			
 
				 	}

			
 
				 

			
 
				 	UINT32 TransientMesh::getIndexOffset() const

			
 
				 	{

			
 
				-		// TODO - Get index offset from parent MeshHeap

			
 
				-		return 0;

			
 
				+		return mParentHeap->getIndexOffset(mId);

			
 
				 	}

			
 
				 

			
 
				 	void TransientMesh::notifyUsedOnGPU()

			
 
				 	{

			
 
				-		// TODO - Refresh EventQuery

			
 
				+		mParentHeap->notifyUsedOnGPU(mId);

			
 
				 	}

			
 
				 }
			
--- a/CamelotD3D11RenderSystem/Include/CmD3D11RenderSystem.h
+++ b/CamelotD3D11RenderSystem/Include/CmD3D11RenderSystem.h
@@ -51,10 +51,10 @@ namespace CamelotFramework
 
				 		void setDrawOperation(DrawOperationType op);

			
 
				 

			
 
				 		/** @copydoc RenderSystem::draw() */

			
 
				-		void draw(UINT32 vertexCount);

			
 
				+		void draw(UINT32 vertexOffset, UINT32 vertexCount);

			
 
				 

			
 
				 		/** @copydoc RenderSystem::drawIndexed() */

			
 
				-		void drawIndexed(UINT32 startIndex, UINT32 indexCount, UINT32 vertexCount);

			
 
				+		void drawIndexed(UINT32 startIndex, UINT32 indexCount, UINT32 vertexOffset, UINT32 vertexCount);

			
 
				 

			
 
				 		/** @copydoc RenderSystem::bindGpuProgram() */

			
 
				 		void bindGpuProgram(HGpuProgram prg);

			
--- a/CamelotD3D11RenderSystem/Source/CmD3D11RenderSystem.cpp
+++ b/CamelotD3D11RenderSystem/Source/CmD3D11RenderSystem.cpp
@@ -545,13 +545,13 @@ namespace CamelotFramework
 
				 			CM_EXCEPT(RenderingAPIException, "Failed to bindGpuParams : " + mDevice->getErrorDescription());

			
 
				 	}

			
 
				 

			
 
				-	void D3D11RenderSystem::draw(UINT32 vertexCount)

			
 
				+	void D3D11RenderSystem::draw(UINT32 vertexOffset, UINT32 vertexCount)

			
 
				 	{

			
 
				 		THROW_IF_NOT_CORE_THREAD;

			
 
				 

			
 
				 		applyInputLayout();

			
 
				 

			
 
				-		mDevice->getImmediateContext()->Draw(vertexCount, 0);

			
 
				+		mDevice->getImmediateContext()->Draw(vertexCount, vertexOffset);

			
 
				 

			
 
				 #if CM_DEBUG_MODE

			
 
				 		if(mDevice->hasError())

			
@@ -559,13 +559,13 @@ namespace CamelotFramework
 
				 #endif

			
 
				 	}

			
 
				 

			
 
				-	void D3D11RenderSystem::drawIndexed(UINT32 startIndex, UINT32 indexCount, UINT32 vertexCount)

			
 
				+	void D3D11RenderSystem::drawIndexed(UINT32 startIndex, UINT32 indexCount, UINT32 vertexOffset, UINT32 vertexCount)

			
 
				 	{

			
 
				 		THROW_IF_NOT_CORE_THREAD;

			
 
				 

			
 
				 		applyInputLayout();

			
 
				 

			
 
				-		mDevice->getImmediateContext()->DrawIndexed(indexCount, startIndex, 0);

			
 
				+		mDevice->getImmediateContext()->DrawIndexed(indexCount, startIndex, vertexOffset);

			
 
				 

			
 
				 #if CM_DEBUG_MODE

			
 
				 		if(mDevice->hasError())

			
--- a/CamelotD3D9Renderer/Include/CmD3D9RenderSystem.h
+++ b/CamelotD3D9Renderer/Include/CmD3D9RenderSystem.h
@@ -141,12 +141,12 @@ namespace CamelotFramework
 
				 		/**

			
 
				 		 * @copydoc RenderSystem::draw()

			
 
				 		 */

			
 
				-		void draw(UINT32 vertexCount);

			
 
				+		void draw(UINT32 vertexOffset, UINT32 vertexCount);

			
 
				 

			
 
				 		/**

			
 
				 		 * @copydoc RenderSystem::drawIndexed()

			
 
				 		 */

			
 
				-		void drawIndexed(UINT32 startIndex, UINT32 indexCount, UINT32 vertexCount);

			
 
				+		void drawIndexed(UINT32 startIndex, UINT32 indexCount, UINT32 vertexOffset, UINT32 vertexCount);

			
 
				 

			
 
				         void setScissorRect(UINT32 left, UINT32 top, UINT32 right, UINT32 bottom);

			
 
				 

			
--- a/CamelotD3D9Renderer/Source/CmD3D9RenderSystem.cpp
+++ b/CamelotD3D9Renderer/Source/CmD3D9RenderSystem.cpp
@@ -1270,11 +1270,11 @@ namespace CamelotFramework
 
				 		mCurrentDrawOperation = op;

			
 
				 	}

			
 
				 	//---------------------------------------------------------------------

			
 
				-	void D3D9RenderSystem::draw(UINT32 vertexCount)

			
 
				+	void D3D9RenderSystem::draw(UINT32 vertexOffset, UINT32 vertexCount)

			
 
				 	{

			
 
				 		UINT32 primCount = pointCountToPrimCount(mCurrentDrawOperation, vertexCount);

			
 
				 

			
 
				-		HRESULT hr = getActiveD3D9Device()->DrawPrimitive(getD3D9PrimitiveType(), 0, static_cast<UINT>(primCount)); 

			
 
				+		HRESULT hr = getActiveD3D9Device()->DrawPrimitive(getD3D9PrimitiveType(), static_cast<UINT>(vertexOffset), static_cast<UINT>(primCount)); 

			
 
				 

			
 
				 		if( FAILED( hr ) )

			
 
				 		{

			
@@ -1283,14 +1283,14 @@ namespace CamelotFramework
 
				 		}

			
 
				 	}

			
 
				 	//---------------------------------------------------------------------

			
 
				-	void D3D9RenderSystem::drawIndexed(UINT32 startIndex, UINT32 indexCount, UINT32 vertexCount)

			
 
				+	void D3D9RenderSystem::drawIndexed(UINT32 startIndex, UINT32 indexCount, UINT32 vertexOffset, UINT32 vertexCount)

			
 
				 	{

			
 
				 		UINT32 primCount = pointCountToPrimCount(mCurrentDrawOperation, indexCount);

			
 
				 

			
 
				 		// do indexed draw operation

			
 
				 		HRESULT hr = getActiveD3D9Device()->DrawIndexedPrimitive(

			
 
				 			getD3D9PrimitiveType(), 

			
 
				-			0, 

			
 
				+			static_cast<UINT>(vertexOffset), 

			
 
				 			0, 

			
 
				 			static_cast<UINT>(vertexCount), 

			
 
				 			static_cast<UINT>(startIndex), 

			
--- a/CamelotGLRenderer/Include/CmGLRenderSystem.h
+++ b/CamelotGLRenderer/Include/CmGLRenderSystem.h
@@ -144,12 +144,12 @@ namespace CamelotFramework {
 
				 		/**

			
 
				 		 * @copydoc RenderSystem::draw()

			
 
				 		 */

			
 
				-		void draw(UINT32 vertexCount);

			
 
				+		void draw(UINT32 vertexOffset, UINT32 vertexCount);

			
 
				 

			
 
				 		/**

			
 
				 		 * @copydoc RenderSystem::drawIndexed()

			
 
				 		 */

			
 
				-		void drawIndexed(UINT32 startIndex, UINT32 indexCount, UINT32 vertexCount);

			
 
				+		void drawIndexed(UINT32 startIndex, UINT32 indexCount, UINT32 vertexOffset, UINT32 vertexCount);

			
 
				 

			
 
				 		/**

			
 
				 		 * @copydoc RenderSystem::clearRenderTarget()

			
--- a/CamelotGLRenderer/Source/CmGLRenderSystem.cpp
+++ b/CamelotGLRenderer/Source/CmGLRenderSystem.cpp
@@ -688,18 +688,18 @@ namespace CamelotFramework
 
				 		mBoundIndexBuffer = buffer;

			
 
				 	}

			
 
				 	//---------------------------------------------------------------------

			
 
				-	void GLRenderSystem::draw(UINT32 vertexCount)

			
 
				+	void GLRenderSystem::draw(UINT32 vertexOffset, UINT32 vertexCount)

			
 
				 	{

			
 
				 		// Find the correct type to render

			
 
				 		GLint primType = getGLDrawMode();

			
 
				 		beginDraw();

			
 
				 

			
 
				-		glDrawArrays(primType, 0, vertexCount);

			
 
				+		glDrawArrays(primType, vertexOffset, vertexCount);

			
 
				 

			
 
				 		endDraw();

			
 
				 	}

			
 
				 	//---------------------------------------------------------------------

			
 
				-	void GLRenderSystem::drawIndexed(UINT32 startIndex, UINT32 indexCount, UINT32 vertexCount)

			
 
				+	void GLRenderSystem::drawIndexed(UINT32 startIndex, UINT32 indexCount, UINT32 vertexOffset, UINT32 vertexCount)

			
 
				 	{

			
 
				 		if(mBoundIndexBuffer == nullptr)

			
 
				 		{

			
@@ -716,7 +716,7 @@ namespace CamelotFramework
 
				 

			
 
				 		GLenum indexType = (mBoundIndexBuffer->getType() == IndexBuffer::IT_16BIT) ? GL_UNSIGNED_SHORT : GL_UNSIGNED_INT;

			
 
				 

			
 
				-		glDrawElements(primType, indexCount, indexType, 0);

			
 
				+		glDrawElementsBaseVertex(primType, indexCount, indexType, (GLvoid*)(mBoundIndexBuffer->getIndexSize() * startIndex), vertexOffset);

			
 
				 

			
 
				 		endDraw();

			
 
				 	}

			
--- a/Opts.txt
+++ b/Opts.txt
@@ -18,16 +18,6 @@ When optimizing UpdateLayout make sure to mark elements that are fully culled as
 
				  - But in order to determine that I first need to update the sprite to find out the elements bounds which defeats the point

			
 
				  - TODO - FIgure this out

			
 
				 

			
 
				- -------------

			
 
				-

			
 
				-TransientMesh

			
 
				- - Only used for writing, only dynamic, and only MAP_NO_OVERWRITE writing

			
 
				- - Accepts starting buffer sizes, and will enlarge them as needed

			
 
				-  - When buffer is enlarged send a warning so user knows to use a bigger buffer next time

			
 
				- - Keeps track of parts of the buffer used by GPU using GPU queries

			
 
				-   - Need to implement a proper GPU query interface

			
 
				- - Keeps track of fragmentation and has an option to defragment, manually or auto after certain %

			
 
				-

			
 
				 ----------

			
 
				 

			
 
				 BIG TODO FINALLY: Reorganize GUI so it all uses one big vertex buffer (probably in the form of a TransientMesh). This means I need better support for drawing individual objects

			
@@ -36,41 +26,5 @@ from a transient mesh by drawing only parts of its buffer. But only do this afte
 
				 

			
 
				 ----------

			
 
				 

			
 
				-Transient mesh brainstorming:

			
 
				-How to release memory from transient mesh?

			
 
				- - Make it use non-CoreAccessor interface?

			
 
				-

			
 
				-

			
 
				-TransientMesh

			
 
				- Upon construction we specify vertex and index format, including initial vertex/index buffer sizes

			
 
				-   MeshChunk allocate(UINT32 numVertices, UINT32 numIndices)

			
 
				-    - Thread safe method (custom mutex), returns a MeshChunk which contains an unique index

			
 
				-    - Each allocation represents its own SubMesh - there can't be multiple sub-meshes per allocation

			
 
				-   deallocate(MeshChunk chunk)

			
 
				-    - I could make MeshChunk as GpuResource, so I can write to it directly

			
 
				-   How do I set materials per sub-mesh?

			
 
				-    - I keep a mapping MeshChunk->Material in GUIManager 

			
 
				-   I will need to update Render method so it can accept MeshChunk?

			
 
				-    - Or should TransientMesh derive from Mesh?

			
 
				-   When calling Render I need to provide index offset/length, however since I will be

			
 
				-   supporting defragmenting that can change internally at any time. It would be ideal to 

			
 
				-   retrieve that data when on core thread.

			
 
				-    - Maybe even rethink Mesh submeshes so that they have a unified interface?

			
 
				-	- BUT: I can handle defragment on the CPU. I just need to find offsets and sizes, and actual

			
 
				-	  memory copies can be done on the core thread later.

			
 
				-

			
 
				-Name it MeshHeap instead of TransientMesh

			
 
				- - It can derive from a common class MeshBase (which Mesh also derives from)

			
 
				- - Render can then accept MeshBase instead of Mesh

			
 
				-

			
 
				-

			
 
				------------

			
 
				-

			
 
				-POTENTIALLY

			
 
				-

			
 
				-I could have normal Meshes use one big vertex/index buffer in the background.

			
 
				- - However that means Dynamic/Static tags don't mean anything

			
 
				- - writeSubresource discard is ignored

			
 
				- - It's not as clear to the user

			
 
				- - Need to implement reading as well

			
 
				- - Not sure if it would work well with a bunch of smaller allocations (Although I think we would avoid those in any case)
			
 
				+When writing to buffer in MeshHeap (in two places at least) I need to add NO_OVERWITE flag to HardwareBuffer

			
 
				+When doing allocInternal I don't check that index/vertex desc in MeshData actually matches the ones in MeshHeap.
			
--- a/TODO.txt
+++ b/TODO.txt
@@ -129,7 +129,7 @@ Low priority TODO
 
				  - onMovedOrResized is still used by Viewport while that same callback is offered by RenderWindowManager. There is no need to have them in both places.

			
 
				  - Texture "ScaleToFit" will cause the texture to repeat instead of clipping the image. e.g. a 50x20 texture placed on an 50x100 area will repeat 5x

			
 
				  - When writing to mesh vertex buffer in Mesh::writeSubresource that requires a color flip I need to create a temporary copy of the 

			
 
				-    entire buffer. It would be better to handle this differently. 

			
 
				+    entire buffer. It would be better to handle this differently. Same thing happens in MeshHeap

			
 
				 ----------------------------------------------------------------------------------------------

			
 
				 Optional:

			
 
				  - Need better handling for shader techniques. Some Materials are able to run on all renderers yet I can only specify one. This is problematic