Browse Source

Major refactor of TextUtility to reduce memory allocations

Marko Pintera 12 years ago
parent
commit
cd3069a13a

+ 1 - 0
BansheeEngine.sln

@@ -44,6 +44,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution
 		EditorWindowDock.txt = EditorWindowDock.txt
 		Notes.txt = Notes.txt
 		RenderOperation.txt = RenderOperation.txt
+		TextOpts.txt = TextOpts.txt
 		TODO.txt = TODO.txt
 		TODODoc.txt = TODODoc.txt
 		TODOEditor.txt = TODOEditor.txt

+ 1 - 1
BansheeEngine/Include/BsTextSprite.h

@@ -42,7 +42,7 @@ namespace BansheeEngine
 
 		void update(const TEXT_SPRITE_DESC& desc);
 
-		static CM::Vector<CM::Int2>::type getAlignmentOffsets(const CM::Vector<CM::TextUtility::TextLine>::type& lines, 
+		static CM::Vector<CM::Int2>::type getAlignmentOffsets(const CM::TextUtility::TextData& textData, 
 			CM::UINT32 width, CM::UINT32 height, TextHorzAlign horzAlign, TextVertAlign vertAlign);
 
 		/**

+ 9 - 7
BansheeEngine/Source/BsGUIInputTool.cpp

@@ -28,12 +28,12 @@ namespace BansheeEngine
 		if(textData == nullptr)
 			return;
 
-		const CM::Vector<TextUtility::TextLine>::type& lines = textData->getLines();
-		const CM::Vector<UINT32>::type& quadsPerPage = textData->getNumQuadsPerPage();
+		UINT32 numLines = textData->getNumLines();
+		UINT32 numPages = textData->getNumPages();
 
 		mNumQuads = 0;
-		for(auto& numQuads : quadsPerPage)
-			mNumQuads += numQuads;
+		for(UINT32 i = 0; i < numPages; i++)
+			mNumQuads += textData->getNumQuadsForPage(i);
 
 		if(mQuads != nullptr)
 			cm_delete<ScratchAlloc>(mQuads);
@@ -48,13 +48,15 @@ namespace BansheeEngine
 		// Store cached line data
 		UINT32 curCharIdx = 0;
 		UINT32 curLineIdx = 0;
-		Vector<Int2>::type alignmentOffsets = TextSprite::getAlignmentOffsets(lines, mTextDesc.width, 
+		Vector<Int2>::type alignmentOffsets = TextSprite::getAlignmentOffsets(*textData, mTextDesc.width, 
 			mTextDesc.height, mTextDesc.horzAlign, mTextDesc.vertAlign);
 
-		for(auto& line : lines)
+		for(UINT32 i = 0; i < numLines; i++)
 		{
+			const TextUtility::TextLine& line = textData->getLine(i);
+
 			// Line has a newline char only if it wasn't created by word wrap and it isn't the last line
-			bool hasNewline = line.hasNewlineChar() && (curLineIdx != ((UINT32)lines.size() - 1));
+			bool hasNewline = line.hasNewlineChar() && (curLineIdx != (numLines - 1));
 
 			UINT32 startChar = curCharIdx;
 			UINT32 endChar = curCharIdx + line.getNumChars() + (hasNewline ? 1 : 0);

+ 50 - 30
BansheeEngine/Source/BsTextSprite.cpp

@@ -4,6 +4,8 @@
 #include "CmFont.h"
 #include "CmVector2.h"
 
+#include "CmProfiler.h" // PROFILING ONLY
+
 using namespace CamelotFramework;
 
 namespace BansheeEngine
@@ -15,18 +17,22 @@ namespace BansheeEngine
 
 	void TextSprite::update(const TEXT_SPRITE_DESC& desc)
 	{
+		gProfiler().beginSample("textUpdateA");
 		std::shared_ptr<TextUtility::TextData> textData = TextUtility::getTextData(desc.text, desc.font, desc.fontSize, desc.width, desc.height, desc.wordWrap);
+		gProfiler().endSample("textUpdateA");
 
 		if(textData == nullptr)
 			return;
 
-		const CM::Vector<TextUtility::TextLine>::type& lines = textData->getLines();
-		const CM::Vector<UINT32>::type& quadsPerPage = textData->getNumQuadsPerPage();
+		gProfiler().beginSample("textUpdateB");
+
+		UINT32 numLines = textData->getNumLines();
+		UINT32 numPages = textData->getNumPages();
 
 		// Resize cached mesh array to needed size
-		if(mCachedRenderElements.size() > quadsPerPage.size())
+		if(mCachedRenderElements.size() > numPages)
 		{
-			for(UINT32 i = (UINT32)quadsPerPage.size(); i < (UINT32)mCachedRenderElements.size(); i++)
+			for(UINT32 i = numPages; i < (UINT32)mCachedRenderElements.size(); i++)
 			{
 				auto& renderElem = mCachedRenderElements[i];
 
@@ -49,15 +55,17 @@ namespace BansheeEngine
 			}
 		}
 
-		if(mCachedRenderElements.size() != quadsPerPage.size())
-			mCachedRenderElements.resize(quadsPerPage.size());
+		if(mCachedRenderElements.size() != numPages)
+			mCachedRenderElements.resize(numPages);
+
+		gProfiler().endSample("textUpdateB");
+		gProfiler().beginSample("textUpdateC");
 
 		// Actually generate a mesh
-		const CM::Vector<HTexture>::type& texturePages = textData->getTexturePages();
 		UINT32 texPage = 0;
 		for(auto& cachedElem : mCachedRenderElements)
 		{
-			UINT32 newNumQuads = quadsPerPage[texPage];
+			UINT32 newNumQuads = textData->getNumQuadsForPage(texPage);
 			if(newNumQuads != cachedElem.numQuads)
 			{
 				UINT32 oldVertexCount = cachedElem.numQuads * 4;
@@ -73,7 +81,7 @@ namespace BansheeEngine
 				cachedElem.numQuads = newNumQuads;
 			}
 
-			HMaterial newMaterial = GUIMaterialManager::instance().requestTextMaterial(texturePages[texPage]);
+			HMaterial newMaterial = GUIMaterialManager::instance().requestTextMaterial(textData->getTextureForPage(texPage));
 			if(cachedElem.material != nullptr)
 				GUIMaterialManager::instance().releaseMaterial(cachedElem.material);
 
@@ -82,9 +90,10 @@ namespace BansheeEngine
 			texPage++;
 		}
 
+		gProfiler().instance().endSample("textUpdateC");
+		gProfiler().instance().beginSample("textUpdateD");
+
 		// Calc alignment and anchor offsets and set final line positions
-		UINT32 numPages = (UINT32)quadsPerPage.size();
-		
 		for(UINT32 j = 0; j < numPages; j++)
 		{
 			SpriteRenderElement& renderElem = mCachedRenderElements[j];
@@ -93,24 +102,28 @@ namespace BansheeEngine
 				renderElem.vertices, renderElem.uvs, renderElem.indexes, renderElem.numQuads);
 		}
 
+		gProfiler().instance().endSample("textUpdateD");
+		gProfiler().instance().beginSample("textUpdateE");
+
 		updateBounds();
+
+		gProfiler().instance().endSample("textUpdateE");
 	}
 
 	UINT32 TextSprite::genTextQuads(UINT32 page, const TextUtility::TextData& textData, UINT32 width, UINT32 height, 
 		TextHorzAlign horzAlign, TextVertAlign vertAlign, SpriteAnchor anchor, Vector2* vertices, Vector2* uv, UINT32* indices, UINT32 bufferSizeQuads)
 	{
-		const CM::Vector<TextUtility::TextLine>::type& lines = textData.getLines();
-		const CM::Vector<UINT32>::type& quadsPerPage = textData.getNumQuadsPerPage();
-
-		UINT32 newNumQuads = quadsPerPage[page];
+		UINT32 numLines = textData.getNumLines();
+		UINT32 newNumQuads = textData.getNumQuadsForPage(page);
 
-		Vector<Int2>::type alignmentOffsets = getAlignmentOffsets(lines, width, height, horzAlign, vertAlign);
+		Vector<Int2>::type alignmentOffsets = getAlignmentOffsets(textData, width, height, horzAlign, vertAlign);
 		Int2 offset = getAnchorOffset(anchor, width, height);
 
 		UINT32 quadOffset = 0;
-		for(size_t i = 0; i < lines.size(); i++)
+		for(UINT32 i = 0; i < numLines; i++)
 		{
-			UINT32 writtenQuads = lines[i].fillBuffer(page, vertices, uv, indices, quadOffset, bufferSizeQuads);
+			const TextUtility::TextLine& line = textData.getLine(i);
+			UINT32 writtenQuads = line.fillBuffer(page, vertices, uv, indices, quadOffset, bufferSizeQuads);
 
 			Int2 position = offset + alignmentOffsets[i];
 			UINT32 numVertices = writtenQuads * 4;
@@ -130,19 +143,20 @@ namespace BansheeEngine
 	UINT32 TextSprite::genTextQuads(const TextUtility::TextData& textData, UINT32 width, UINT32 height, 
 		TextHorzAlign horzAlign, TextVertAlign vertAlign, SpriteAnchor anchor, Vector2* vertices, Vector2* uv, UINT32* indices, UINT32 bufferSizeQuads)
 	{
-		const CM::Vector<TextUtility::TextLine>::type& lines = textData.getLines();
-		const CM::Vector<UINT32>::type& quadsPerPage = textData.getNumQuadsPerPage();
+		UINT32 numLines = textData.getNumLines();
+		UINT32 numPages = textData.getNumPages();
 
-		Vector<Int2>::type alignmentOffsets = getAlignmentOffsets(lines, width, height, horzAlign, vertAlign);
+		Vector<Int2>::type alignmentOffsets = getAlignmentOffsets(textData, width, height, horzAlign, vertAlign);
 		Int2 offset = getAnchorOffset(anchor, width, height);
 
 		UINT32 quadOffset = 0;
-		UINT32 numPages = (UINT32)quadsPerPage.size();
-		for(size_t i = 0; i < lines.size(); i++)
+		
+		for(UINT32 i = 0; i < numLines; i++)
 		{
+			const TextUtility::TextLine& line = textData.getLine(i);
 			for(UINT32 j = 0; j < numPages; j++)
 			{
-				UINT32 writtenQuads = lines[i].fillBuffer(j, vertices, uv, indices, quadOffset, bufferSizeQuads);
+				UINT32 writtenQuads = line.fillBuffer(j, vertices, uv, indices, quadOffset, bufferSizeQuads);
 
 				Int2 position = offset + alignmentOffsets[i];
 
@@ -160,12 +174,16 @@ namespace BansheeEngine
 		return quadOffset;
 	}
 
-	Vector<Int2>::type TextSprite::getAlignmentOffsets(const Vector<TextUtility::TextLine>::type& lines, 
+	Vector<Int2>::type TextSprite::getAlignmentOffsets(const TextUtility::TextData& textData, 
 		UINT32 width, UINT32 height, TextHorzAlign horzAlign, TextVertAlign vertAlign)
 	{
+		UINT32 numLines = textData.getNumLines();
 		UINT32 curHeight = 0;
-		for(auto& line : lines)
+		for(UINT32 i = 0; i < numLines; i++)
+		{
+			const TextUtility::TextLine& line = textData.getLine(i);
 			curHeight += line.getYOffset();
+		}
 
 		// Calc vertical alignment offset
 		UINT32 vertDiff = std::max(0U, height - curHeight);
@@ -186,8 +204,10 @@ namespace BansheeEngine
 		// Calc horizontal alignment offset
 		UINT32 curY = 0;
 		Vector<Int2>::type lineOffsets;
-		for(size_t i = 0; i < lines.size(); i++)
+		for(UINT32 i = 0; i < numLines; i++)
 		{
+			const TextUtility::TextLine& line = textData.getLine(i);
+
 			UINT32 horzOffset = 0;
 			switch(horzAlign)
 			{
@@ -195,15 +215,15 @@ namespace BansheeEngine
 				horzOffset = 0;
 				break;
 			case THA_Right:
-				horzOffset = std::max(0, (INT32)(width - lines[i].getWidth()));
+				horzOffset = std::max(0, (INT32)(width - line.getWidth()));
 				break;
 			case THA_Center:
-				horzOffset = std::max(0, (INT32)(width - lines[i].getWidth())) / 2;
+				horzOffset = std::max(0, (INT32)(width - line.getWidth())) / 2;
 				break;
 			}
 
 			lineOffsets.push_back(Int2(horzOffset, vertOffset + curY));
-			curY += lines[i].getYOffset();
+			curY += line.getYOffset();
 		}
 
 		return lineOffsets;

+ 82 - 33
CamelotCore/Include/CmTextUtility.h

@@ -12,48 +12,48 @@ namespace CamelotFramework
 		class TextWord
 		{
 		public:
-			TextWord(bool spacer);
+			void init(bool spacer);
 
-			UINT32 addChar(const CHAR_DESC& desc);
+			UINT32 addChar(UINT32 charIdx, const CHAR_DESC& desc);
 			void addSpace(UINT32 spaceWidth);
 
 			UINT32 getWidth() const { return mWidth; }
 			UINT32 getHeight() const { return mHeight; }
 			bool isSpacer() const { return mSpacer; }
 
-			const Vector<CHAR_DESC>::type& getChars() const { return mChars; }
+			UINT32 getNumChars() const { return mLastChar == nullptr ? 0 : (mCharsEnd - mCharsStart + 1); }
+			UINT32 getCharsStart() const { return mCharsStart; }
+			UINT32 getCharsEnd() const { return mCharsEnd; }
 
 		private:
-			Vector<CHAR_DESC>::type mChars;
+			UINT32 mCharsStart, mCharsEnd;
 			UINT32 mWidth;
 			UINT32 mHeight;
+
+			const CHAR_DESC* mLastChar;
+
 			bool mSpacer;
 			UINT32 mSpaceWidth;
 		};
-
+		
+		struct PageInfo
+		{
+			UINT32 numQuads;
+			HTexture texture;
+		};
 	public:
+		class TextData;
+
 		class CM_EXPORT TextLine
 		{
 		public:
-			TextLine(UINT32 baselineOffset, UINT32 lineHeight, UINT32 spaceWidth);
-			~TextLine();
-
 			UINT32 getWidth() const { return mWidth; }
 			UINT32 getHeight() const { return mHeight; }
 
 			/**
 			 * @brief	Returns an offset used to separate two lines.
 			 */
-			UINT32 getYOffset() const { return mLineHeight; }
-
-			/**
-			 * @brief	Gets a number quads used by all characters for every page used by this text line.
-			 *
-			 * @note	One page generally corresponds to one bitmap from which the characters are read from.
-			 * 			
-			 *			One character is represented with a single quad. Some pages might be empty.
-			 */
-			Vector<UINT32>::type getNumQuadsPerPage() const;
+			UINT32 getYOffset() const { return mTextData->getLineHeight(); }
 
 			/**
 			 * @brief	Fills the vertex/uv/index buffers for the specified page, with all the character data
@@ -84,22 +84,24 @@ namespace CamelotFramework
 		private:
 			friend class TextUtility;
 
+			TextData* mTextData;
+			UINT32 mWordsStart, mWordsEnd;
+
 			UINT32 mWidth;
 			UINT32 mHeight;
-			UINT32 mBaselineOffset;
-			UINT32 mLineHeight;
-			UINT32 mSpaceWidth;
-			Vector<TextWord>::type mWords;
-			TextWord* mLastWord;
+
+			bool mIsEmpty;
 			bool mHasNewline;
 
-			void add(const CHAR_DESC& charDesc);
+			void add(UINT32 charIdx, const CHAR_DESC& charDesc);
 			void addSpace();
-			void addWord(const TextWord& word);
+			void addWord(UINT32 wordIdx, const TextWord& word);
 
+			void init(TextData* textData);
 			void finalize(bool hasNewlineChar);
 
-			TextWord removeLastWord();
+			bool isEmpty() const { return mIsEmpty; }
+			UINT32 removeLastWord();
 
 			void calculateBounds();
 		};
@@ -107,25 +109,72 @@ namespace CamelotFramework
 		class CM_EXPORT TextData
 		{
 		public:
+			TextData(const HFont& font, INT32 baselineOffset, UINT32 lineHeight, UINT32 spaceWidth);
 			~TextData();
 
-			const Vector<TextLine>::type& getLines() const { return mLines; }
-			const Vector<HTexture>::type& getTexturePages() const { return mTexturePages; }
-			const Vector<UINT32>::type& getNumQuadsPerPage() const  { return mQuadsPerPage; }
+			UINT32 getNumLines() const { return mNumLines; }
+			UINT32 getNumPages() const { return mNumPageInfos; }
+
+			const TextLine& getLine(UINT32 idx) const { return mLines[idx]; }
+			const HTexture& getTextureForPage(UINT32 page) const { return mPageInfos[page].texture; }
+			UINT32 getNumQuadsForPage(UINT32 page) const { return mPageInfos[page].numQuads; }
+
 			UINT32 getWidth() const;
 			UINT32 getHeight() const;
 
 		private:
 			friend class TextUtility;
+			friend class TextLine;
 
-			Vector<UINT32>::type mQuadsPerPage;
-			Vector<TextLine>::type mLines;
-			Vector<HTexture>::type mTexturePages;
+			INT32 getBaselineOffset() const { return mBaselineOffset; }
+			UINT32 getLineHeight() const { return mLineHeight; }
+			UINT32 getSpaceWidth() const { return mSpaceWidth; }
+
+			const CHAR_DESC& getChar(UINT32 idx) const { return *mChars[idx]; }
+			const TextWord& getWord(UINT32 idx) const { return mWords[idx]; }
+
+		private:
+			const CHAR_DESC** mChars;
+			UINT32 mNumChars;
+
+			TextWord* mWords;
+			UINT32 mNumWords;
+
+			TextLine* mLines;
+			UINT32 mNumLines;
+
+			PageInfo* mPageInfos;
+			UINT32 mNumPageInfos;
+
+			void* mData;
+
+			HFont mFont;
+			INT32 mBaselineOffset;
+			UINT32 mLineHeight;
+			UINT32 mSpaceWidth;
 		};
 
 		static std::shared_ptr<TextUtility::TextData> getTextData(const WString& text, const HFont& font, UINT32 fontSize, UINT32 width = 0, UINT32 height = 0, bool wordWrap = false);
 
 	private:
-		static void addCharToPage(TextData& data, UINT32 page, const FontData& fontData);
+		friend class TextLine;
+		friend class TextWord;
+
+		static Vector<TextWord>::type WordBuffer;
+		static UINT32 NextFreeWord;
+
+		static Vector<TextLine>::type LineBuffer;
+		static UINT32 NextFreeLine;
+
+		static Vector<PageInfo>::type PageBuffer;
+		static UINT32 NextFreePageInfo;
+
+		static UINT32 allocWord(bool spacer);
+		static UINT32 allocLine(TextData* textData);
+		static void deallocAll();
+
+		static void addCharToPage(UINT32 page, const FontData& fontData);
+
+
 	};
 }

+ 224 - 137
CamelotCore/Source/CmTextUtility.cpp

@@ -7,22 +7,28 @@ namespace CamelotFramework
 {
 	const int SPACE_CHAR = 32;
 
-	TextUtility::TextWord::TextWord(bool spacer)
-		:mWidth(0), mHeight(0), mSpacer(spacer), mSpaceWidth(0)
-	{ }
+	void TextUtility::TextWord::init(bool spacer)
+	{
+		mWidth = mHeight = 0;
+		mSpacer = spacer;
+		mSpaceWidth = 0;
+		mCharsStart = 0;
+		mCharsEnd = 0;
+		mLastChar = nullptr;
+	}
 
-	UINT32 TextUtility::TextWord::addChar(const CHAR_DESC& desc)
+	// Assumes charIdx is an index right after last char in the list (if any). All chars need to be sequential.
+	UINT32 TextUtility::TextWord::addChar(UINT32 charIdx, const CHAR_DESC& desc)
 	{
 		UINT32 charWidth = desc.xAdvance;
-		if(mChars.size() > 0)
+		if(mLastChar != nullptr)
 		{
 			UINT32 kerning = 0;
-			CHAR_DESC& prevChar = mChars.back();
-			for(size_t j = 0; j < prevChar.kerningPairs.size(); j++)
+			for(size_t j = 0; j < mLastChar->kerningPairs.size(); j++)
 			{
-				if(prevChar.kerningPairs[j].otherCharId == desc.charId)
+				if(mLastChar->kerningPairs[j].otherCharId == desc.charId)
 				{
-					kerning = prevChar.kerningPairs[j].amount;
+					kerning = mLastChar->kerningPairs[j].amount;
 					break;
 				}
 			}
@@ -33,7 +39,12 @@ namespace CamelotFramework
 		mWidth += charWidth;
 		mHeight = std::max(mHeight, desc.height);
 
-		mChars.push_back(desc);
+		if(mLastChar == nullptr) // First char
+			mCharsStart = mCharsEnd = charIdx;
+		else
+			mCharsEnd = charIdx;
+
+		mLastChar = &desc;
 
 		return charWidth;
 	}
@@ -45,15 +56,13 @@ namespace CamelotFramework
 		mHeight = 0;
 	}
 
-	TextUtility::TextLine::TextLine(UINT32 baselineOffset, UINT32 lineHeight, UINT32 spaceWidth)
-		:mWidth(0), mHeight(0), mLastWord(nullptr), mBaselineOffset(baselineOffset), 
-		mLineHeight(lineHeight), mSpaceWidth(spaceWidth)
-	{
-
-	}
-
-	TextUtility::TextLine::~TextLine()
+	void TextUtility::TextLine::init(TextData* textData)
 	{
+		mWidth = 0;
+		mHeight = 0;
+		mIsEmpty = true; 
+		mTextData = textData;
+		mWordsStart = mWordsEnd = 0;
 	}
 
 	void TextUtility::TextLine::finalize(bool hasNewlineChar)
@@ -61,100 +70,76 @@ namespace CamelotFramework
 		mHasNewline = hasNewlineChar;
 	}
 
-	void TextUtility::TextLine::add(const CHAR_DESC& charDesc)
+	void TextUtility::TextLine::add(UINT32 charIdx, const CHAR_DESC& charDesc)
 	{
 		UINT32 charWidth = 0;
-		if(mLastWord == nullptr)
+		if(mIsEmpty)
 		{
-			mWords.push_back(TextWord(false));
-			mLastWord = &mWords.back();
-
-			charWidth = mLastWord->addChar(charDesc);
+			mWordsStart = mWordsEnd = allocWord(false);
+			mIsEmpty = false;
 		}
 		else
 		{
-			if(mLastWord->isSpacer())
-			{
-				mWords.push_back(TextWord(false));
-				mLastWord = &mWords.back();
-			}
-
-			charWidth = mLastWord->addChar(charDesc);
+			if(TextUtility::WordBuffer[mWordsEnd].isSpacer())
+				mWordsEnd = allocWord(false);
 		}
 
+		TextWord& lastWord = TextUtility::WordBuffer[mWordsEnd];
+		charWidth = lastWord.addChar(charIdx, charDesc);
+
 		mWidth += charWidth;
-		mHeight = std::max(mHeight, mLastWord->getHeight());
+		mHeight = std::max(mHeight, lastWord.getHeight());
 	}
 
 	void TextUtility::TextLine::addSpace()
 	{
-		if(mLastWord == nullptr)
+		if(mIsEmpty)
 		{
-			mWords.push_back(TextWord(true));
-			mLastWord = &mWords.back();
-
-			mLastWord->addSpace(mSpaceWidth);
+			mWordsStart = mWordsEnd = allocWord(true);
+			mIsEmpty = false;
 		}
 		else
-		{
-			mWords.push_back(TextWord(true)); // Each space is counted as its own word, to make certain operations easier
-			mLastWord = &mWords.back();
+			mWordsEnd = allocWord(true); // Each space is counted as its own word, to make certain operations easier
 
-			mLastWord->addSpace(mSpaceWidth);
-		}
+		TextWord& lastWord = TextUtility::WordBuffer[mWordsEnd];
+		lastWord.addSpace(mTextData->getSpaceWidth());
 
-		mWidth += mSpaceWidth;
+		mWidth += mTextData->getSpaceWidth();
 	}
 
-	void TextUtility::TextLine::addWord(const TextWord& word)
+	// Assumes wordIdx is an index right after last word in the list (if any). All words need to be sequential.
+	void TextUtility::TextLine::addWord(UINT32 wordIdx, const TextWord& word)
 	{
-		mWords.push_back(word);
-		mLastWord = &mWords.back();
+		if(mIsEmpty)
+		{
+			mWordsStart = mWordsEnd = wordIdx;
+			mIsEmpty = false;
+		}
+		else
+			mWordsEnd = wordIdx;
 
 		mWidth += word.getWidth();
 		mHeight = std::max(mHeight, word.getHeight());
 	}
 
-	TextUtility::TextWord TextUtility::TextLine::removeLastWord()
+	UINT32 TextUtility::TextLine::removeLastWord()
 	{
-		if(mWords.size() == 0)
-			return nullptr;
-
-		TextWord word = mWords[mWords.size() - 1];
-		mWords.erase(mWords.end() - 1);
-
-		if(mWords.size() > 0)
-			mLastWord = &mWords[mWords.size() - 1];
-		else
-			mLastWord = nullptr;
-
-		calculateBounds();
-
-		return word;
-	}
-
-	Vector<UINT32>::type TextUtility::TextLine::getNumQuadsPerPage() const
-	{
-		Vector<UINT32>::type quadsPerPage;
-		for(auto wordIter = mWords.begin(); wordIter != mWords.end(); ++wordIter)
+		if(mIsEmpty)
 		{
-			if(!wordIter->isSpacer())
-			{
-				const Vector<CHAR_DESC>::type& chars = wordIter->getChars();
-				UINT32 kerning = 0;
-				for(auto charIter = chars.begin(); charIter != chars.end(); ++charIter)
-				{
-					if(charIter->page > (UINT32)quadsPerPage.size())
-						quadsPerPage.resize(charIter->page + 1);
+			assert(false);
+			return 0;
+		}
 
-					quadsPerPage[charIter->page]++;
-				}
-			}
-			else
-				quadsPerPage[0]++;
+		UINT32 lastWord = mWordsEnd--;
+		if(mWordsStart == lastWord)
+		{
+			mIsEmpty = true;
+			mWordsStart = mWordsEnd = 0;
 		}
 
-		return quadsPerPage;
+		calculateBounds();
+
+		return lastWord;
 	}
 
 	UINT32 TextUtility::TextLine::fillBuffer(UINT32 page, Vector2* vertices, Vector2* uvs, UINT32* indexes, UINT32 offset, UINT32 size) const
@@ -162,9 +147,11 @@ namespace CamelotFramework
 		UINT32 numQuads = 0;
 
 		UINT32 penX = 0;
-		for(auto wordIter = mWords.begin(); wordIter != mWords.end(); ++wordIter)
+		for(UINT32 i = mWordsStart; i <= mWordsEnd; i++)
 		{
-			if(wordIter->isSpacer())
+			const TextWord& word = mTextData->getWord(i);
+
+			if(word.isSpacer())
 			{
 				// We store invisible space quads in the first page. Even though they aren't needed
 				// for rendering and we could just leave an empty space, they are needed for intersection tests
@@ -178,9 +165,9 @@ namespace CamelotFramework
 					UINT32 curIndex = offset * 6;
 
 					vertices[curVert + 0] = Vector2((float)curX, (float)curY);
-					vertices[curVert + 1] = Vector2((float)(curX + mSpaceWidth), (float)curY);
-					vertices[curVert + 2] = Vector2((float)curX, (float)curY + (float)mLineHeight);
-					vertices[curVert + 3] = Vector2((float)(curX + mSpaceWidth), (float)curY + (float)mLineHeight);
+					vertices[curVert + 1] = Vector2((float)(curX + mTextData->getSpaceWidth()), (float)curY);
+					vertices[curVert + 2] = Vector2((float)curX, (float)curY + (float)mTextData->getLineHeight());
+					vertices[curVert + 3] = Vector2((float)(curX + mTextData->getSpaceWidth()), (float)curY + (float)mTextData->getLineHeight());
 
 					if(uvs != nullptr)
 					{
@@ -208,49 +195,51 @@ namespace CamelotFramework
 						CM_EXCEPT(InternalErrorException, "Out of buffer bounds. Buffer size: " + toString(size));
 				}
 
-				penX += mSpaceWidth;
+				penX += mTextData->getSpaceWidth();
 			}
 			else
 			{
-				const Vector<CHAR_DESC>::type& chars = wordIter->getChars();
 				UINT32 kerning = 0;
-				for(auto charIter = chars.begin(); charIter != chars.end(); ++charIter)
+				for(UINT32 j = word.getCharsStart(); j <= word.getCharsEnd(); j++)
 				{
-					INT32 curX = penX + charIter->xOffset;
-					INT32 curY = ((INT32)mBaselineOffset - charIter->yOffset);
+					const CHAR_DESC& curChar = mTextData->getChar(j);
+
+					INT32 curX = penX + curChar.xOffset;
+					INT32 curY = ((INT32) mTextData->getBaselineOffset() - curChar.yOffset);
 
-					penX += charIter->xAdvance + kerning;
+					penX += curChar.xAdvance + kerning;
 					
 					kerning = 0;
-					if((charIter + 1) != chars.end())
+					if((j + 1) <= word.getCharsEnd())
 					{
-						for(size_t j = 0; j < charIter->kerningPairs.size(); j++)
+						const CHAR_DESC& nextChar = mTextData->getChar(j + 1);
+						for(size_t j = 0; j < curChar.kerningPairs.size(); j++)
 						{
-							if(charIter->kerningPairs[j].otherCharId == (charIter + 1)->charId)
+							if(curChar.kerningPairs[j].otherCharId == nextChar.charId)
 							{
-								kerning = charIter->kerningPairs[j].amount;
+								kerning = curChar.kerningPairs[j].amount;
 								break;
 							}
 						}
 					}
 
-					if(charIter->page != page)
+					if(curChar.page != page)
 						continue;
 
 					UINT32 curVert = offset * 4;
 					UINT32 curIndex = offset * 6;
 
 					vertices[curVert + 0] = Vector2((float)curX, (float)curY);
-					vertices[curVert + 1] = Vector2((float)(curX + charIter->width), (float)curY);
-					vertices[curVert + 2] = Vector2((float)curX, (float)curY + (float)charIter->height);
-					vertices[curVert + 3] = Vector2((float)(curX + charIter->width), (float)curY + (float)charIter->height);
+					vertices[curVert + 1] = Vector2((float)(curX + curChar.width), (float)curY);
+					vertices[curVert + 2] = Vector2((float)curX, (float)curY + (float)curChar.height);
+					vertices[curVert + 3] = Vector2((float)(curX + curChar.width), (float)curY + (float)curChar.height);
 
 					if(uvs != nullptr)
 					{
-						uvs[curVert + 0] = Vector2(charIter->uvX, charIter->uvY);
-						uvs[curVert + 1] = Vector2(charIter->uvX + charIter->uvWidth, charIter->uvY);
-						uvs[curVert + 2] = Vector2(charIter->uvX, charIter->uvY + charIter->uvHeight);
-						uvs[curVert + 3] = Vector2(charIter->uvX + charIter->uvWidth, charIter->uvY + charIter->uvHeight);
+						uvs[curVert + 0] = Vector2(curChar.uvX, curChar.uvY);
+						uvs[curVert + 1] = Vector2(curChar.uvX + curChar.uvWidth, curChar.uvY);
+						uvs[curVert + 2] = Vector2(curChar.uvX, curChar.uvY + curChar.uvHeight);
+						uvs[curVert + 3] = Vector2(curChar.uvX + curChar.uvWidth, curChar.uvY + curChar.uvHeight);
 					}
 
 					if(indexes != nullptr)
@@ -278,12 +267,14 @@ namespace CamelotFramework
 	UINT32 TextUtility::TextLine::getNumChars() const
 	{
 		UINT32 numChars = 0;
-		for(auto& word : mWords)
+		for(UINT32 i = mWordsStart; i <= mWordsEnd; i++)
 		{
+			TextWord& word = TextUtility::WordBuffer[i];
+
 			if(word.isSpacer())
 				numChars++;
 			else
-				numChars += (UINT32)word.getChars().size();
+				numChars += (UINT32)word.getNumChars();
 		}
 
 		return numChars;
@@ -293,19 +284,68 @@ namespace CamelotFramework
 	{
 		mWidth = 0;
 		mHeight = 0;
-		for(auto iter = mWords.begin(); iter != mWords.end(); ++iter)
+
+		for(UINT32 i = mWordsStart; i <= mWordsEnd; i++)
 		{
-			mWidth += iter->getWidth();
-			mHeight = std::max(mHeight, iter->getHeight());
+			TextWord& word = TextUtility::WordBuffer[i];
+
+			mWidth += word.getWidth();
+			mHeight = std::max(mHeight, word.getHeight());
 		}
 	}
 
+	TextUtility::TextData::TextData(const HFont& font, INT32 baselineOffset, UINT32 lineHeight, UINT32 spaceWidth)
+		:mFont(font), mBaselineOffset(baselineOffset), mLineHeight(lineHeight), mSpaceWidth(spaceWidth), mChars(nullptr),
+		mNumChars(0), mWords(nullptr), mNumWords(0), mLines(nullptr), mNumLines(0), mPageInfos(nullptr), mNumPageInfos(0), mData(nullptr)
+	{
+	}
+
 	TextUtility::TextData::~TextData()
 	{
+		if(mData != nullptr)
+			cm_free(mData);
+	}
+
+	Vector<TextUtility::TextWord>::type TextUtility::WordBuffer = Vector<TextUtility::TextWord>::type(2000);
+	UINT32 TextUtility::NextFreeWord = 0;
+
+	Vector<TextUtility::TextLine>::type TextUtility::LineBuffer = Vector<TextUtility::TextLine>::type(500);
+	UINT32 TextUtility::NextFreeLine = 0;
+
+	Vector<TextUtility::PageInfo>::type TextUtility::PageBuffer = Vector<TextUtility::PageInfo>::type(20);
+	UINT32 TextUtility::NextFreePageInfo = 0;
+
+	UINT32 TextUtility::allocWord(bool spacer)
+	{
+		if(NextFreeWord >= WordBuffer.size())
+			WordBuffer.resize(WordBuffer.size() * 2);
+
+		WordBuffer[NextFreeWord].init(spacer);
+
+		return NextFreeWord++;
+	}
+
+	UINT32 TextUtility::allocLine(TextData* textData)
+	{
+		if(NextFreeLine >= LineBuffer.size())
+			LineBuffer.resize(LineBuffer.size() * 2);
+
+		LineBuffer[NextFreeLine].init(textData);
+
+		return NextFreeLine++;
+	}
+
+	void TextUtility::deallocAll()
+	{
+		NextFreeWord = 0;
+		NextFreeLine = 0;
+		NextFreePageInfo = 0;
 	}
 
 	std::shared_ptr<TextUtility::TextData> TextUtility::getTextData(const WString& text, const HFont& font, UINT32 fontSize, UINT32 width, UINT32 height, bool wordWrap)
 	{
+		// In order to reduce number of memory allocations algorithm first calculates data into temporary buffers and then copies the results
+		
 		const FontData* fontData = nullptr;
 		if(font != nullptr)
 		{
@@ -323,10 +363,9 @@ namespace CamelotFramework
 
 		bool widthIsLimited = width > 0;
 
-		std::shared_ptr<TextUtility::TextData> textData = cm_shared_ptr<TextData, PoolAlloc>();
-		textData->mLines.push_back(TextLine(fontData->fontDesc.baselineOffset, fontData->fontDesc.lineHeight, fontData->fontDesc.spaceWidth));
-		TextLine* curLine = &textData->mLines.back();
+		std::shared_ptr<TextUtility::TextData> textData = cm_shared_ptr<TextData, PoolAlloc>(font, fontData->fontDesc.baselineOffset, fontData->fontDesc.lineHeight, fontData->fontDesc.spaceWidth);
 
+		UINT32 curLineIdx = allocLine(textData.get());
 		UINT32 curHeight = fontData->fontDesc.lineHeight;
 		UINT32 charIdx = 0;
 
@@ -338,11 +377,14 @@ namespace CamelotFramework
 			UINT32 charId = text[charIdx];
 			const CHAR_DESC& charDesc = fontData->getCharDesc(charId);
 
+			TextLine& curLine = LineBuffer[curLineIdx];
+
 			if(text[charIdx] == '\n')
 			{
-				curLine->finalize(true);
-				textData->mLines.push_back(TextLine(fontData->fontDesc.baselineOffset, fontData->fontDesc.lineHeight, fontData->fontDesc.spaceWidth));
-				curLine = &textData->mLines.back();
+				curLine.finalize(true);
+
+				curLineIdx = allocLine(textData.get());
+				curLine = LineBuffer[curLineIdx];
 
 				curHeight += fontData->fontDesc.lineHeight;
 
@@ -352,62 +394,107 @@ namespace CamelotFramework
 
 			if(charId != SPACE_CHAR)
 			{
-				curLine->add(charDesc);
-				addCharToPage(*textData, charDesc.page, *fontData);
+				curLine.add(charIdx, charDesc);
+				addCharToPage(charDesc.page, *fontData);
 			}
 			else
 			{
-				curLine->addSpace();
-				addCharToPage(*textData, 0, *fontData);
+				curLine.addSpace();
+				addCharToPage(0, *fontData);
 			}
 
-			if(widthIsLimited && curLine->getWidth() > width)
+			if(widthIsLimited && curLine.getWidth() > width)
 			{
 				if(wordWrap)
 				{
-					TextWord lastWord = curLine->removeLastWord();
+					assert(!curLine.isEmpty());
+
+					UINT32 lastWordIdx = curLine.removeLastWord();
+					TextWord& lastWord = WordBuffer[lastWordIdx];
 
 					if(lastWord.getWidth() <= width) // If the word fits, attempt to add it to a new line
 					{
-						curLine->finalize(false);
-						textData->mLines.push_back(TextLine(fontData->fontDesc.baselineOffset, fontData->fontDesc.lineHeight, fontData->fontDesc.spaceWidth));
-						curLine = &textData->mLines.back();
+						curLine.finalize(false);
+
+						curLineIdx = allocLine(textData.get());
+						curLine = LineBuffer[curLineIdx];
 
 						curHeight += fontData->fontDesc.lineHeight;
 					}
 
-					curLine->addWord(lastWord);
+					curLine.addWord(lastWordIdx, lastWord);
 				}
 			}
 
 			charIdx++;
 		}
 
-		curLine->finalize(true);
+		LineBuffer[curLineIdx].finalize(true);
+
+		// Now that we have all the data we need, allocate the permanent buffers and copy the data
+		textData->mNumChars = (UINT32)text.size();
+		textData->mNumWords = NextFreeWord;
+		textData->mNumLines = NextFreeLine;
+		textData->mNumPageInfos = NextFreePageInfo;
+
+		UINT32 charArraySize = textData->mNumChars * sizeof(const CHAR_DESC*);
+		UINT32 wordArraySize = textData->mNumWords * sizeof(TextWord);
+		UINT32 lineArraySize = textData->mNumLines * sizeof(TextLine);
+		UINT32 pageInfoArraySize = textData->mNumPageInfos * sizeof(PageInfo);
+
+		UINT32 totalBufferSize = charArraySize + wordArraySize + lineArraySize + pageInfoArraySize;
+		textData->mData = cm_alloc(totalBufferSize);
+
+		UINT8* dataPtr = (UINT8*)textData->mData;
+		textData->mChars = (const CHAR_DESC**)dataPtr;
+
+		for(UINT32 i = 0; i < textData->mNumChars; i++)
+		{
+			UINT32 charId = text[i];
+			const CHAR_DESC& charDesc = fontData->getCharDesc(charId);
+
+			textData->mChars[i] = &charDesc;
+		}
+
+		dataPtr += charArraySize;
+		textData->mWords = (TextWord*)dataPtr;
+		memcpy(textData->mWords, &WordBuffer[0], wordArraySize);
+
+		dataPtr += wordArraySize;
+		textData->mLines = (TextLine*)dataPtr;
+		memcpy(textData->mLines, &LineBuffer[0], lineArraySize);
+
+		dataPtr += lineArraySize;
+		textData->mPageInfos = (PageInfo*)dataPtr;
+		memcpy(textData->mPageInfos, &PageBuffer[0], pageInfoArraySize);
+
+		TextUtility::deallocAll();
 
 		return textData;
 	}
 
-	void TextUtility::addCharToPage(TextUtility::TextData& data, UINT32 page, const FontData& fontData)
+	void TextUtility::addCharToPage(UINT32 page, const FontData& fontData)
 	{
-		if(page >= (UINT32)data.mQuadsPerPage.size())
+		while(page >= NextFreePageInfo)
 		{
-			data.mQuadsPerPage.resize(page + 1);
-			data.mTexturePages.resize(page + 1);
+			PageBuffer[NextFreePageInfo].numQuads = 0;
+			PageBuffer[NextFreePageInfo].texture = HTexture();
+
+			NextFreePageInfo++;
 		}
 
-		data.mQuadsPerPage[page]++;
+		PageBuffer[page].numQuads++;
 
-		if(data.mTexturePages[page] == nullptr)
-			data.mTexturePages[page] = fontData.texturePages[page];
+		if(PageBuffer[page].texture == nullptr)
+			PageBuffer[page].texture = fontData.texturePages[page];
 	}
 
 	UINT32 TextUtility::TextData::getWidth() const
 	{
 		UINT32 width = 0;
 
-		for(auto& line : mLines)
-			width = std::max(width, line.getWidth());
+		for(UINT32 i = 0; i < mNumLines; i++)
+			width = std::max(width, mLines[i].getWidth());
 
 		return width;
 	}
@@ -416,8 +503,8 @@ namespace CamelotFramework
 	{
 		UINT32 height = 0;
 
-		for(auto& line : mLines)
-			height += line.getHeight();
+		for(UINT32 i = 0; i < mNumLines; i++)
+			height += mLines[i].getHeight();
 
 		return height;
 	}

+ 5 - 33
TextOpts.txt

@@ -1,35 +1,7 @@
-TextUtility should just use a stack for all allocations. Once text has been generated, calculate actual memory required for non-transient data. Copy the persistent data into the newly allocated block and free the stack.
+Make sure to also update TextSprite and ImageSprite and anything else in UpdateMesh, then don't forget to find the issue that causes elements to get marked as dirty every single frame
 
-Consider an option that wouldn't require copying?
- - A pre-pass that only calculates line/word width/height, and char indexes
-    - Data gets allocated on the stack
- - Using the information I allocate a block on the heap, and just iterate over the data and fill the actual line/word data
+MultiLine text doesn't work and possibly input/selection
 
-
-All allocs that happens:
- TextData shared_ptr - to get around it I'd have to Pool TextData - and that would still possible cause an internal shared_ptr alloc to happen. Consider leaving this for last.
- Lines array - One array per text data
- Words array - One array per line
- Chard array - One array per word
- Quads per page and texture pages array
-
-
- STEP 2 - Consider optimizing TextSprite and ImageSprite mesh generation, it also seems to be doing a lot of allocs
-
- TODO - Consider implementing a smarted Pooling solution
-  - Pool stores a ptr to first free element
-  - Allocating an object returns the first free element, and updates the free ptr to the element first free element was pointing
-  - Deallocating an object makes it the first free element, and its data is set to point to previous first free element
-  - Other
-    - Allocating a new block just creates a link with the previous block
-	- Allocating sequential arrays is not supported
-  - TODO - This won't actually work as imagined as I need to find proper block based on element address. MemPool in AllocatorTest does it really well.
-
-TODO - I really feel like allocations should be much faster. 100 000 allocations/frees using non-debug heap take about 30ms, while in my code thats the time it takes for execute 6000
- - Same code (CRT allocations & frees) executes about 30 times slower in Camelot than in a test project
- - And SetHeapInformation in Camelot fails, indicating that debug heap is active
-
- BansheeEngine x64 Release is being compiled as console
-
-Consider using VirtualAlloc, VirtualFree and see if that speeds up allocations. (Potentially Heaps API if Virtual is too complex)
- - Also see if threading effects heap performance
+		// TODO: Ensure that buffers in TextData actually handle multiple threads properly (THREADLOCAL?)
+		// TODO: Merge buffer allocations for shared_ptr and data into a single alloc?
+		// TODO: Pre-opt stats: 1500 allocs, 600 frees, 0.5ms